blob: dd842cba29a5ffa8d156f3199fd3839471bb8a3a [file] [log] [blame]
#!/usr/bin/env python
import collections
import multiprocessing.pool
import subprocess
import sys
import common
from autotest_lib.server import frontend
from autotest_lib.server import utils
SERVERS = ['cautotest', 'cautotest-cq']
def ssh_command(host, cmd):
return ['ssh', '-o PasswordAuthentication=no', '-o ConnectTimeout=1',
'-o ConnectionAttempts=1', '-o StrictHostKeyChecking=no',
'-q', 'root@'+host, '--', cmd]
class Result(object):
def __init__(self, host, ping, ssh, servod, logs):
self.host = host
self.ping = ping
self.ssh = ssh
self.servod = servod
self.logs = logs
def log_result(result):
print "Examining %s ..." % result.host
if result.ping:
print " PING = UP"
else:
print " PING = DOWN\n"
return
if result.ssh:
print " SSH = UP"
else:
print " SSH = DOWN\n"
return
print " SERVOD = %s" % ('UP' if result.servod else 'DOWN',)
print " LOGS = \n%s" % (result.logs,)
def check_servo(servo):
r = Result(servo, None, None, None, None)
r.ping = (utils.ping(servo, tries=5, deadline=5) == 0)
if not r.ping:
return r
try:
subprocess.check_output(ssh_command(servo, "true"))
except subprocess.CalledProcessError:
r.ssh = False
return r
else:
r.ssh = True
try:
output = subprocess.check_output(ssh_command(servo, "pgrep servod"))
except subprocess.CalledProcessError:
r.servod = False
else:
r.servod = (output != "")
try:
output = subprocess.check_output(
ssh_command(servo, "tail /var/log/servod.log"))
except subprocess.CalledProcessError:
r.logs = ""
else:
r.logs = output
return r
def redeploy_hdctools(host):
try:
subprocess.check_output(
ssh_command(host, "/home/chromeos-test/hdctools/beaglebone/deploy"),
stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
return False
else:
return True
def install_package(package):
def curry(host):
try:
subprocess.check_output(
ssh_command(host, "apt-get install %s" % package),
stderr=subprocess.STDOUT)
subprocess.check_output(
ssh_command(host, "start servod"),
stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
return False
else:
return True
curry.__name__ = "install_package(%s)" % package
return curry
def manual_intervention(reason):
def curry(_):
return False
curry.__name__ = 'MANUAL(%s)' % reason
return curry
Fix = collections.namedtuple('Fix', ['host', 'method', 'success'])
# I don't know if these failures are one-time or repeating, so I'm adding code
# here for now. If these are seen and fixed by this frequently, then this code
# should be moved into servo_host's repair()
def diagnose_failure(r):
method = None
if r.logs and 'ImportError: Entry point' in r.logs:
method = redeploy_hdctools
if r.logs and 'ImportError: No module named serial' in r.logs:
method = install_package('python-serial')
if not r.ping or not r.ssh:
method = manual_intervention('servo is unresponsive on network')
if r.logs and 'No usb device connected to servo' in r.logs:
method = manual_intervention("servo doesn't see USB drive")
if r.logs and 'discover_servo - No servos found' in r.logs:
method = manual_intervention("beaglebone doesn't see servo")
if method:
return Fix(r.host, method.__name__, method(r.host))
else:
return None
def main():
pool = multiprocessing.pool.ThreadPool()
all_results = []
for server in SERVERS:
afe = frontend.AFE(server=server)
hosts = afe.run('get_hosts', multiple_labels=['servo'],
status='Repair Failed')
servos = [h['hostname']+'-servo.cros' for h in hosts]
results = pool.imap_unordered(check_servo, servos)
for result in results:
log_result(result)
all_results.append(result)
# fix 'em if you can?
fixes = filter(None, pool.imap_unordered(diagnose_failure, all_results))
for fix in fixes:
print ("Fixing %(host)s via %(method)s resulted in %(success)s" %
dict(host=fix.host, method=fix.method,
success='SUCCESS' if fix.success else 'FAILURE'))
return 0
if __name__ == '__main__':
sys.exit(main())