recover_duts: Reimplement as shell script

In certain autotest failure cases /var gets wiped and with it the
Python interpreter, in such a situation recover_duts.py can't be
executed. The script is fairly simple and doesn't really benefit
from being implemented in Python, reimplement it as shell script.

BUG=chromium:882015
TEST=touch /mnt/stateful_partition/.labmachine
  rmmod r8152 # or applicable ethernet driver
  # wait up to 90s
    => eth0 is brought up again
CQ-DEPEND=CL:1217082

Change-Id: I07a4610056c83e88f1b19e7e2f8b7767f81254e2
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/1213409
Reviewed-by: Douglas Anderson <dianders@chromium.org>
diff --git a/recover_duts/README b/recover_duts/README
index c76a5b4..e955b75 100644
--- a/recover_duts/README
+++ b/recover_duts/README
@@ -2,8 +2,8 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-Recover DUTS is a Python Utility that runs on test images. It periodically runs
-a set of hooks to ensure that we can correctly connect / recover a bricked DUT
+Recover DUTS is a utility that runs on test images. It periodically runs a set
+of hooks to ensure that we can correctly connect / recover a bricked DUT
 without requiring manual recovery.
 
 In order to add a hook, add an executable script in the hooks/ dir that ends with
diff --git a/recover_duts/recover_duts.py b/recover_duts/recover_duts.py
deleted file mode 100755
index a43ea85..0000000
--- a/recover_duts/recover_duts.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Recover duts.
-
-This module runs at system startup on Chromium OS test images. It runs through
-a set of hooks to keep a DUT from being bricked without manual intervention.
-Example hook:
-  Check to see if ethernet is connected. If its not, unload and reload the
-    ethernet driver.
-"""
-
-import logging
-import os
-import subprocess
-import time
-
-from logging import handlers
-
-LOGGING_SUBDIR = '/var/log/recover_duts'
-LOG_FILENAME = 'recover_duts.log'
-LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
-LONG_REBOOT_DELAY = 90
-SLEEP_DELAY = 60
-LOG_FILE_BACKUP_COUNT = 10
-LOG_FILE_SIZE = 1024 * 5000 # 5000 KB
-
-
-def _setup_logging(log_file):
-  """Setup logging.
-
-  Args:
-    log_file: path to log file.
-  """
-  log_formatter = logging.Formatter(LOGGING_FORMAT)
-  handler = handlers.RotatingFileHandler(
-      filename=log_file, maxBytes=LOG_FILE_SIZE,
-      backupCount=LOG_FILE_BACKUP_COUNT)
-  handler.setFormatter(log_formatter)
-  logger = logging.getLogger()
-  log_level = logging.DEBUG
-  logger.setLevel(log_level)
-  logger.addHandler(handler)
-
-def main():
-  if not os.path.isdir(LOGGING_SUBDIR):
-    os.makedirs(LOGGING_SUBDIR)
-
-  log_file = os.path.join(LOGGING_SUBDIR, LOG_FILENAME)
-  _setup_logging(log_file)
-  hooks_dir = os.path.join(os.path.dirname(__file__), 'hooks')
-
-  # Additional sleep as networking not be up in the case of a long reboot.
-  time.sleep(LONG_REBOOT_DELAY)
-  try:
-    while True:
-      for script in os.listdir(hooks_dir):
-        script = os.path.join(hooks_dir, script)
-        if os.path.isfile(script) and script.endswith('.hook'):
-          logging.debug('Running hook: %s', script)
-          popen = subprocess.Popen([script], stdout=subprocess.PIPE,
-                                   stderr=subprocess.STDOUT)
-          output = popen.communicate()[0]
-          if popen.returncode == 0:
-            logging.debug('Running of %s succeeded with output:\n%s', script,
-                          output)
-          else:
-            logging.warn('Running of %s failed with output:\n%s', script,
-                         output)
-      time.sleep(SLEEP_DELAY)
-
-  except Exception as e:
-    # Since this is run from an upstart job we want to ensure we log this into
-    # our log file before dying.
-    logging.fatal(str(e))
-    raise
-
-
-if __name__ == '__main__':
-  main()
diff --git a/recover_duts/recover_duts.sh b/recover_duts/recover_duts.sh
new file mode 100755
index 0000000..60c756c
--- /dev/null
+++ b/recover_duts/recover_duts.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This module runs at system startup on Chromium OS test images. It runs through
+# a set of hooks to keep a DUT from being bricked without manual intervention.
+# Example hook:
+#   Check to see if ethernet is connected. If its not, unload and reload the
+#   ethernet driver.
+
+LOG_DIR=/var/log/recover_duts
+LOG_FILE="${LOG_DIR}/recover_duts.log"
+LONG_REBOOT_DELAY=90
+SLEEP_DELAY=60
+
+_log() {
+  local prio="$1"
+  local msg="$2"
+
+  local ts="$(date +"%F %T,%N" | cut -b -23)"
+  echo "${ts} - ${prio} - ${msg}" >> "${LOG_FILE}"
+}
+
+log_dbg() {
+  local msg="$1"
+
+  _log DEBUG "${msg}"
+}
+
+log_err() {
+  local msg="$1"
+
+  _log ERROR "${msg}"
+}
+
+main() {
+  local hooks_dir="$(dirname "$0")/hooks"
+  local script output ret
+
+  if [ $# -ne 0 ]; then
+    echo "Usage: $(basename "$0")" >&2
+    exit 1
+  fi
+
+  mkdir -p "${LOG_DIR}"
+
+  # Additional sleep as networking not be up in the case of a long reboot.
+  sleep "${LONG_REBOOT_DELAY}"
+
+  while true; do
+    log_dbg "starting loop"
+
+    for script in "${hooks_dir}"/*.hook; do
+      log_dbg "Running hook: ${script}"
+
+      output="$("${script}" 2>&1)"
+      ret="$?"
+      if [ "${ret}" = "0" ]; then
+        if [ -z "${output}" ]; then
+          log_dbg "Running of ${script} succeeded"
+        else
+          log_dbg "Running of ${script} succeeded with output: ${output}"
+        fi
+      else
+        if [ -z "${output}" ]; then
+          log_err "Running of ${script} failed with no output (exit status: ${ret})"
+        else
+          log_err "Running of ${script} failed with output (exit status: ${ret}): ${output}"
+        fi
+      fi
+    done
+
+    sleep "${SLEEP_DELAY}"
+  done
+}
+
+main "$@"