| #!/bin/bash |
| # |
| # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| set -e |
| |
| # Only run this script on test machines that run in the lab. |
| # See autotest/server/hosts/site_host.py for more information. |
| if [ ! -f /mnt/stateful_partition/.labmachine ]; then |
| exit 0 |
| fi |
| |
| NON_ETHERNET_DRIVERS="cdc_ether" |
| |
| # Critical messages should be sent to /var/log/messages. Other messages should |
| # be sent via echo to be harvested by recover_duts.py. |
| # |
| # TODO(tbroch) Relocate this to common hook library if/when there's more than |
| # one hook. |
| critical_msg() { |
| echo "$@" |
| logger -t "$(basename $0)" -- "$@" |
| } |
| |
| # Returns the default gateway. |
| get_default_gateway() { |
| local ip_route="$(ip route get 1.0.0.0)" |
| echo ${ip_route} | head -n 1 | cut -f 3 -d ' ' |
| } |
| |
| # Returns 0 if $1 is a non-Ethernet driver, or 1 otherwise. |
| is_non_ethernet_driver() { |
| local driver="$1" |
| local non_ethernet_driver |
| |
| for non_ethernet_driver in ${NON_ETHERNET_DRIVERS}; do |
| if [ "${driver}" = "${non_ethernet_driver}" ]; then |
| return 0 |
| fi |
| done |
| return 1 |
| } |
| |
| # Returns 0 if $1 indicates its link is connected, or 1 otherwise. |
| is_connected() { |
| local device="$1" |
| ip link show "${device}" | grep -q LOWER_UP |
| } |
| |
| # Shows the list of Ethernet interfaces found on the system. |
| find_ethernet_interfaces() { |
| local device_path |
| local driver_path |
| local device |
| local driver |
| |
| for device_path in /sys/class/net/eth*; do |
| driver_path="${device_path}/device/driver" |
| if [ -e "${driver_path}" ]; then |
| device=$(basename "${device_path}") |
| driver=$(basename $(readlink -f "${driver_path}")) |
| if ! is_non_ethernet_driver "${driver}" || is_connected "${device}" ; then |
| echo "${device}" |
| fi |
| fi |
| done |
| } |
| |
| # Shows the list of USB-Ethernet interfaces found on the system. |
| find_usb_ethernet_interfaces() { |
| for device_path in /sys/class/net/eth*; do |
| if readlink -f "${device_path}" | grep -q usb; then |
| basename "${device_path}" |
| fi |
| done |
| } |
| |
| # Pings the given ipaddress through all wired ethernet devices |
| # $1 - IP address to ping. |
| do_ping() { |
| local ip_addr=$1 |
| for eth in $(find_ethernet_interfaces); do |
| ping -q -I ${eth} -c 9 ${ip_addr} && return 0 |
| done |
| return 1 |
| } |
| |
| # Restart all our ethernet devices and restart shill. |
| # Return the remote IP address of the first established SSH connection |
| find_ssh_client() { |
| netstat -lanp | awk '/tcp.*:22.*ESTABLISHED.*/ {split($5,a,":"); print a[1]}' |
| } |
| |
| # Try to find a connected SSH client (our autotest server) and ping it |
| ping_controlling_server() { |
| local default_gateway="$(get_default_gateway)" || default_gateway= |
| if [ -n "${default_gateway}" ]; then |
| do_ping ${default_gateway} && return 0 |
| fi |
| |
| local ssh_client="$(find_ssh_client)" || ssh_client= |
| if [ -n "${ssh_client}" ]; then |
| do_ping ${ssh_client} && return 0 |
| fi |
| return 1 |
| } |
| |
| reload_usb_ethernet_devices() { |
| local eth |
| local ret=1 |
| for eth in $(find_usb_ethernet_interfaces); do |
| echo "Reload interface ${eth}" |
| reload_network_device "${eth}" |
| ret=0 |
| done |
| return $ret |
| } |
| |
| toggle_ethernet_interfaces() { |
| local eth |
| local ret=1 |
| for eth in $(find_ethernet_interfaces); do |
| echo "Bounce interface ${eth}" |
| ifconfig "${eth}" down |
| ifconfig "${eth}" up |
| ret=0 |
| done |
| return $ret |
| } |
| |
| restart_connection_manager() { |
| initctl stop shill || echo "Shill was not running." |
| initctl start shill |
| } |
| |
| ensure_connection_manager_is_running() { |
| if initctl status shill | grep -q running ; then |
| return 1 |
| fi |
| restart_connection_manager |
| } |
| |
| recover_network() { |
| for recovery_method in \ |
| ensure_connection_manager_is_running \ |
| toggle_ethernet_interfaces \ |
| reload_usb_ethernet_devices \ |
| restart_connection_manager; do |
| critical_msg "Attempting recovery method \"${recovery_method}\"" |
| # A success return from the recovery method implies that it successfully |
| # performed some action that makes it worth re-checking to see whether |
| # our connectivity was remediated. Otherwise, we move on to the next |
| # recovery method without delay. |
| "${recovery_method}" || continue |
| sleep 30 |
| ifconfig -a |
| |
| if ping_controlling_server; then |
| critical_msg "Recovery method \"${recovery_method}\" successful" |
| return 0 |
| fi |
| done |
| return 1 |
| } |
| |
| TIMEOUT_MINUTES=15 |
| TIMEOUT=$(( TIMEOUT_MINUTES * 60 )) |
| |
| main() { |
| # Attempt to ping our controlling autotest server over ethernet. |
| # We guarantee a minimum of 12 minutes network timeout tolerance |
| # for tests that disrupt connectivity with the SSH connection from |
| # the autotest server. This timeout is 15 minutes to make sure it |
| # can never fail before that SSH session does. |
| |
| local endtime=$(( $(date +%s) + TIMEOUT )) |
| if ping_controlling_server; then |
| return 0 |
| fi |
| if recover_network; then |
| return 0 |
| fi |
| critical_msg "Restart failed; will retry recovery for ~$TIMEOUT_MINUTES minutes" |
| while [ $(date +%s) -lt $endtime ]; do |
| sleep 30 |
| if ping_controlling_server; then |
| critical_msg "Gateway now reachable; ending recovery loop" |
| return 0 |
| fi |
| if recover_network; then |
| return 0 |
| fi |
| done |
| |
| critical_msg "All efforts to recover ethernet have been exhausted. Rebooting." |
| sync |
| (sleep 5 && reboot) & |
| return 1 |
| } |
| |
| main |