recover_duts: Reimplement as shell script
In certain autotest failure cases /var gets wiped and with it the
Python interpreter, in such a situation recover_duts.py can't be
executed. The script is fairly simple and doesn't really benefit
from being implemented in Python, reimplement it as shell script.
BUG=chromium:882015
TEST=touch /mnt/stateful_partition/.labmachine
rmmod r8152 # or applicable ethernet driver
# wait up to 90s
=> eth0 is brought up again
CQ-DEPEND=CL:1217082
Change-Id: I07a4610056c83e88f1b19e7e2f8b7767f81254e2
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/1213409
Reviewed-by: Douglas Anderson <dianders@chromium.org>
diff --git a/recover_duts/README b/recover_duts/README
index c76a5b4..e955b75 100644
--- a/recover_duts/README
+++ b/recover_duts/README
@@ -2,8 +2,8 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-Recover DUTS is a Python Utility that runs on test images. It periodically runs
-a set of hooks to ensure that we can correctly connect / recover a bricked DUT
+Recover DUTS is a utility that runs on test images. It periodically runs a set
+of hooks to ensure that we can correctly connect / recover a bricked DUT
without requiring manual recovery.
In order to add a hook, add an executable script in the hooks/ dir that ends with
diff --git a/recover_duts/recover_duts.py b/recover_duts/recover_duts.py
deleted file mode 100755
index a43ea85..0000000
--- a/recover_duts/recover_duts.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Recover duts.
-
-This module runs at system startup on Chromium OS test images. It runs through
-a set of hooks to keep a DUT from being bricked without manual intervention.
-Example hook:
- Check to see if ethernet is connected. If its not, unload and reload the
- ethernet driver.
-"""
-
-import logging
-import os
-import subprocess
-import time
-
-from logging import handlers
-
-LOGGING_SUBDIR = '/var/log/recover_duts'
-LOG_FILENAME = 'recover_duts.log'
-LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
-LONG_REBOOT_DELAY = 90
-SLEEP_DELAY = 60
-LOG_FILE_BACKUP_COUNT = 10
-LOG_FILE_SIZE = 1024 * 5000 # 5000 KB
-
-
-def _setup_logging(log_file):
- """Setup logging.
-
- Args:
- log_file: path to log file.
- """
- log_formatter = logging.Formatter(LOGGING_FORMAT)
- handler = handlers.RotatingFileHandler(
- filename=log_file, maxBytes=LOG_FILE_SIZE,
- backupCount=LOG_FILE_BACKUP_COUNT)
- handler.setFormatter(log_formatter)
- logger = logging.getLogger()
- log_level = logging.DEBUG
- logger.setLevel(log_level)
- logger.addHandler(handler)
-
-def main():
- if not os.path.isdir(LOGGING_SUBDIR):
- os.makedirs(LOGGING_SUBDIR)
-
- log_file = os.path.join(LOGGING_SUBDIR, LOG_FILENAME)
- _setup_logging(log_file)
- hooks_dir = os.path.join(os.path.dirname(__file__), 'hooks')
-
- # Additional sleep as networking not be up in the case of a long reboot.
- time.sleep(LONG_REBOOT_DELAY)
- try:
- while True:
- for script in os.listdir(hooks_dir):
- script = os.path.join(hooks_dir, script)
- if os.path.isfile(script) and script.endswith('.hook'):
- logging.debug('Running hook: %s', script)
- popen = subprocess.Popen([script], stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- output = popen.communicate()[0]
- if popen.returncode == 0:
- logging.debug('Running of %s succeeded with output:\n%s', script,
- output)
- else:
- logging.warn('Running of %s failed with output:\n%s', script,
- output)
- time.sleep(SLEEP_DELAY)
-
- except Exception as e:
- # Since this is run from an upstart job we want to ensure we log this into
- # our log file before dying.
- logging.fatal(str(e))
- raise
-
-
-if __name__ == '__main__':
- main()
diff --git a/recover_duts/recover_duts.sh b/recover_duts/recover_duts.sh
new file mode 100755
index 0000000..60c756c
--- /dev/null
+++ b/recover_duts/recover_duts.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This module runs at system startup on Chromium OS test images. It runs through
+# a set of hooks to keep a DUT from being bricked without manual intervention.
+# Example hook:
+# Check to see if ethernet is connected. If its not, unload and reload the
+# ethernet driver.
+
+LOG_DIR=/var/log/recover_duts
+LOG_FILE="${LOG_DIR}/recover_duts.log"
+LONG_REBOOT_DELAY=90
+SLEEP_DELAY=60
+
+_log() {
+ local prio="$1"
+ local msg="$2"
+
+ local ts="$(date +"%F %T,%N" | cut -b -23)"
+ echo "${ts} - ${prio} - ${msg}" >> "${LOG_FILE}"
+}
+
+log_dbg() {
+ local msg="$1"
+
+ _log DEBUG "${msg}"
+}
+
+log_err() {
+ local msg="$1"
+
+ _log ERROR "${msg}"
+}
+
+main() {
+ local hooks_dir="$(dirname "$0")/hooks"
+ local script output ret
+
+ if [ $# -ne 0 ]; then
+ echo "Usage: $(basename "$0")" >&2
+ exit 1
+ fi
+
+ mkdir -p "${LOG_DIR}"
+
+ # Additional sleep as networking not be up in the case of a long reboot.
+ sleep "${LONG_REBOOT_DELAY}"
+
+ while true; do
+ log_dbg "starting loop"
+
+ for script in "${hooks_dir}"/*.hook; do
+ log_dbg "Running hook: ${script}"
+
+ output="$("${script}" 2>&1)"
+ ret="$?"
+ if [ "${ret}" = "0" ]; then
+ if [ -z "${output}" ]; then
+ log_dbg "Running of ${script} succeeded"
+ else
+ log_dbg "Running of ${script} succeeded with output: ${output}"
+ fi
+ else
+ if [ -z "${output}" ]; then
+ log_err "Running of ${script} failed with no output (exit status: ${ret})"
+ else
+ log_err "Running of ${script} failed with output (exit status: ${ret}): ${output}"
+ fi
+ fi
+ done
+
+ sleep "${SLEEP_DELAY}"
+ done
+}
+
+main "$@"