recover_duts: Reimplement as shell script In certain autotest failure cases /var gets wiped and with it the Python interpreter, in such a situation recover_duts.py can't be executed. The script is fairly simple and doesn't really benefit from being implemented in Python, reimplement it as shell script. BUG=chromium:882015 TEST=touch /mnt/stateful_partition/.labmachine rmmod r8152 # or applicable ethernet driver # wait up to 90s => eth0 is brought up again CQ-DEPEND=CL:1217082 Change-Id: I07a4610056c83e88f1b19e7e2f8b7767f81254e2 Signed-off-by: Matthias Kaehlcke <mka@chromium.org> Reviewed-on: https://chromium-review.googlesource.com/1213409 Reviewed-by: Douglas Anderson <dianders@chromium.org>

commit: ea94e325ebe45d445bd032b278daaa91e7cdacc4 [log] [tgz]
author: Matthias Kaehlcke <mka@chromium.org> Fri Sep 07 14:56:25 2018 -0700
committer: chrome-bot <chrome-bot@chromium.org> Mon Sep 17 13:17:28 2018 -0700
tree: a7a1a747f0af86cf970f8d80563976dd1a836788
parent: 584d04568c8b135e3b24fe3ff372bd6e22078c6e [diff]
diff --git a/recover_duts/README b/recover_duts/README
index c76a5b4..e955b75 100644
--- a/recover_duts/README
+++ b/recover_duts/README

@@ -2,8 +2,8 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-Recover DUTS is a Python Utility that runs on test images. It periodically runs
-a set of hooks to ensure that we can correctly connect / recover a bricked DUT
+Recover DUTS is a utility that runs on test images. It periodically runs a set
+of hooks to ensure that we can correctly connect / recover a bricked DUT
 without requiring manual recovery.
 
 In order to add a hook, add an executable script in the hooks/ dir that ends with

diff --git a/recover_duts/recover_duts.py b/recover_duts/recover_duts.py
deleted file mode 100755
index a43ea85..0000000
--- a/recover_duts/recover_duts.py
+++ /dev/null

@@ -1,83 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Recover duts.
-
-This module runs at system startup on Chromium OS test images. It runs through
-a set of hooks to keep a DUT from being bricked without manual intervention.
-Example hook:
-  Check to see if ethernet is connected. If its not, unload and reload the
-    ethernet driver.
-"""
-
-import logging
-import os
-import subprocess
-import time
-
-from logging import handlers
-
-LOGGING_SUBDIR = '/var/log/recover_duts'
-LOG_FILENAME = 'recover_duts.log'
-LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
-LONG_REBOOT_DELAY = 90
-SLEEP_DELAY = 60
-LOG_FILE_BACKUP_COUNT = 10
-LOG_FILE_SIZE = 1024 * 5000 # 5000 KB
-
-
-def _setup_logging(log_file):
-  """Setup logging.
-
-  Args:
-    log_file: path to log file.
-  """
-  log_formatter = logging.Formatter(LOGGING_FORMAT)
-  handler = handlers.RotatingFileHandler(
-      filename=log_file, maxBytes=LOG_FILE_SIZE,
-      backupCount=LOG_FILE_BACKUP_COUNT)
-  handler.setFormatter(log_formatter)
-  logger = logging.getLogger()
-  log_level = logging.DEBUG
-  logger.setLevel(log_level)
-  logger.addHandler(handler)
-
-def main():
-  if not os.path.isdir(LOGGING_SUBDIR):
-    os.makedirs(LOGGING_SUBDIR)
-
-  log_file = os.path.join(LOGGING_SUBDIR, LOG_FILENAME)
-  _setup_logging(log_file)
-  hooks_dir = os.path.join(os.path.dirname(__file__), 'hooks')
-
-  # Additional sleep as networking not be up in the case of a long reboot.
-  time.sleep(LONG_REBOOT_DELAY)
-  try:
-    while True:
-      for script in os.listdir(hooks_dir):
-        script = os.path.join(hooks_dir, script)
-        if os.path.isfile(script) and script.endswith('.hook'):
-          logging.debug('Running hook: %s', script)
-          popen = subprocess.Popen([script], stdout=subprocess.PIPE,
-                                   stderr=subprocess.STDOUT)
-          output = popen.communicate()[0]
-          if popen.returncode == 0:
-            logging.debug('Running of %s succeeded with output:\n%s', script,
-                          output)
-          else:
-            logging.warn('Running of %s failed with output:\n%s', script,
-                         output)
-      time.sleep(SLEEP_DELAY)
-
-  except Exception as e:
-    # Since this is run from an upstart job we want to ensure we log this into
-    # our log file before dying.
-    logging.fatal(str(e))
-    raise
-
-
-if __name__ == '__main__':
-  main()

diff --git a/recover_duts/recover_duts.sh b/recover_duts/recover_duts.sh
new file mode 100755
index 0000000..60c756c
--- /dev/null
+++ b/recover_duts/recover_duts.sh

@@ -0,0 +1,78 @@
+#!/bin/sh
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This module runs at system startup on Chromium OS test images. It runs through
+# a set of hooks to keep a DUT from being bricked without manual intervention.
+# Example hook:
+#   Check to see if ethernet is connected. If its not, unload and reload the
+#   ethernet driver.
+
+LOG_DIR=/var/log/recover_duts
+LOG_FILE="${LOG_DIR}/recover_duts.log"
+LONG_REBOOT_DELAY=90
+SLEEP_DELAY=60
+
+_log() {
+  local prio="$1"
+  local msg="$2"
+
+  local ts="$(date +"%F %T,%N" | cut -b -23)"
+  echo "${ts} - ${prio} - ${msg}" >> "${LOG_FILE}"
+}
+
+log_dbg() {
+  local msg="$1"
+
+  _log DEBUG "${msg}"
+}
+
+log_err() {
+  local msg="$1"
+
+  _log ERROR "${msg}"
+}
+
+main() {
+  local hooks_dir="$(dirname "$0")/hooks"
+  local script output ret
+
+  if [ $# -ne 0 ]; then
+    echo "Usage: $(basename "$0")" >&2
+    exit 1
+  fi
+
+  mkdir -p "${LOG_DIR}"
+
+  # Additional sleep as networking not be up in the case of a long reboot.
+  sleep "${LONG_REBOOT_DELAY}"
+
+  while true; do
+    log_dbg "starting loop"
+
+    for script in "${hooks_dir}"/*.hook; do
+      log_dbg "Running hook: ${script}"
+
+      output="$("${script}" 2>&1)"
+      ret="$?"
+      if [ "${ret}" = "0" ]; then
+        if [ -z "${output}" ]; then
+          log_dbg "Running of ${script} succeeded"
+        else
+          log_dbg "Running of ${script} succeeded with output: ${output}"
+        fi
+      else
+        if [ -z "${output}" ]; then
+          log_err "Running of ${script} failed with no output (exit status: ${ret})"
+        else
+          log_err "Running of ${script} failed with output (exit status: ${ret}): ${output}"
+        fi
+      fi
+    done
+
+    sleep "${SLEEP_DELAY}"
+  done
+}
+
+main "$@"
commit	ea94e325ebe45d445bd032b278daaa91e7cdacc4	[log] [tgz]
author	Matthias Kaehlcke <mka@chromium.org>	Fri Sep 07 14:56:25 2018 -0700
committer	chrome-bot <chrome-bot@chromium.org>	Mon Sep 17 13:17:28 2018 -0700
tree	a7a1a747f0af86cf970f8d80563976dd1a836788
parent	584d04568c8b135e3b24fe3ff372bd6e22078c6e [diff]