recover_duts: ping SSH clients to check connectivity

recover_duts' check_ethernet.hook tests for network connectivity by
pinging the system's default gateway on any device that starts with
"eth". This is obviously a problem when the default route does not use
such a device, as it can happen during some autotests (like
power_LoadTest) that use the cros/ script to switch their
default route to wireless.

What we really want to know is if the autotest server is still reachable
and can control the device. Autotest uses an SSH connection to control
its client, so this patch changes recover_duts to search for an active
SSH connection and ping that remote client first. This will work with
backchannel tests as it specifically ensures to keep routes to subnets
that have active SSH connections on the wired device. If no active SSH
connection is detected, the hook will fall back to the old method.

TEST=Submit this patch and rejoice as all the power_LoadTest runs in the
lab magically start to work again for the first time in months.

Change-Id: I0066ccebe7a2f77744f2104555160c08feb7c108
Signed-off-by: Julius Werner <>
Reviewed-by: Chris Sosa <>
diff --git a/recover_duts/hooks/check_ethernet.hook b/recover_duts/hooks/check_ethernet.hook
index 389b0b2..b581f8d 100755
--- a/recover_duts/hooks/check_ethernet.hook
+++ b/recover_duts/hooks/check_ethernet.hook
@@ -50,13 +50,14 @@
-# Pings the given ipaddress through the given ethernet device.
-# $1 - The ethernet device to ping through.
-# $2 - IP address to ping.
+# Pings the given ipaddress through all wired ethernet devices
+# $1 - IP address to ping.
 do_ping() {
-  local eth=$1
-  local ip_addr=$2
-  ping -I ${eth} -c 1 ${ip_addr}
+  local ip_addr=$1
+  for eth in $(find_ethernet_interfaces); do
+    ping -I ${eth} -c 3 ${ip_addr} && return 0
+  done
+  return 1
 # Restart all our ethernet devices and restart shill.
@@ -71,33 +72,38 @@
   sleep 30
-# Loop through all ethernet devices and see if we can connect to our default
-# gateway.
-ping_default_gateway_over_ethernet() {
-  local eth default_gateway
-  default_gateway="$(get_default_gateway)" || default_gateway=
-  if [ -n "${default_gateway}" ]; then
-    for eth in $(find_ethernet_interfaces); do
-      if do_ping ${eth} ${default_gateway}; then
-        return 0
-      fi
-    done
+# Return the remote IP address of the first established SSH connection
+find_ssh_client() {
+  netstat -lanp | awk '/tcp.*:22.*ESTABLISHED.*/ {split($5,a,":"); print a[1]}'
+# Try to find a connected SSH client (our autotest server) and ping it
+ping_controlling_server() {
+  local ssh_client default_gateway
+  ssh_client="$(find_ssh_client)" || ssh_client=
+  if [ -n "${ssh_client}" ]; then
+    do_ping ${ssh_client} && return 0
+  else
+    default_gateway="$(get_default_gateway)" || default_gateway=
+    if [ -n "${default_gateway}" ]; then
+      do_ping ${default_gateway} && return 0
+    fi
   return 1
 main() {
-  # Attempt to ping our default gateway over ethernet.
-  if ping_default_gateway_over_ethernet; then
+  # Attempt to ping our controlling autotest server over ethernet.
+  if ping_controlling_server; then
     return 0
-  # We can't reach our default gateway through any ethernet devices.
+  # We can't reach our controlling server through any ethernet devices.
   # Attempt to ping again. If successful, return 1 so that way log the fact
   # that we need to take action to recover the dut.
-  if ping_default_gateway_over_ethernet; then
+  if ping_controlling_server; then
     return 1