blob: 37bc8723e0ccfcca9d0cc0d666d32d5df15d56a1 [file] [log] [blame]
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import json
import logging
import os
import re
import signal
import socket
import time
import urllib2
from autotest_lib.client.common_lib import base_utils, error, global_config
from autotest_lib.client.cros import constants
# Keep checking if the pid is alive every second until the timeout (in seconds)
CHECK_PID_IS_ALIVE_TIMEOUT = 6
_LOCAL_HOST_LIST = ('localhost', '127.0.0.1')
LAB_GOOD_STATES = ('open', 'throttled')
class ParseBuildNameException(Exception):
"""Raised when ParseBuildName() cannot parse a build name."""
pass
def ParseBuildName(name):
"""Format a build name, given board, type, milestone, and manifest num.
@param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0'
@return board: board the manifest is for, e.g. x86-alex.
@return type: one of 'release', 'factory', or 'firmware'
@return milestone: (numeric) milestone the manifest was associated with.
@return manifest: manifest number, e.g. '2015.0.0'
"""
match = re.match(r'([\w-]+)-(\w+)/R(\d+)-([\d.ab-]+)', name)
if match and len(match.groups()) == 4:
return match.groups()
raise ParseBuildNameException('%s is a malformed build name.' % name)
def ping(host, deadline=None, tries=None, timeout=60):
"""Attempt to ping |host|.
Shell out to 'ping' to try to reach |host| for |timeout| seconds.
Returns exit code of ping.
Per 'man ping', if you specify BOTH |deadline| and |tries|, ping only
returns 0 if we get responses to |tries| pings within |deadline| seconds.
Specifying |deadline| or |count| alone should return 0 as long as
some packets receive responses.
@param host: the host to ping.
@param deadline: seconds within which |tries| pings must succeed.
@param tries: number of pings to send.
@param timeout: number of seconds after which to kill 'ping' command.
@return exit code of ping command.
"""
args = [host]
if deadline:
args.append('-w%d' % deadline)
if tries:
args.append('-c%d' % tries)
return base_utils.run('ping', args=args,
ignore_status=True, timeout=timeout,
stdout_tee=base_utils.TEE_TO_LOGS,
stderr_tee=base_utils.TEE_TO_LOGS).exit_status
def host_is_in_lab_zone(hostname):
"""Check if the host is in the CROS.dns_zone.
@param hostname: The hostname to check.
@returns True if hostname.dns_zone resolves, otherwise False.
"""
host_parts = hostname.split('.')
dns_zone = global_config.global_config.get_config_value('CROS', 'dns_zone',
default=None)
fqdn = '%s.%s' % (host_parts[0], dns_zone)
try:
socket.gethostbyname(fqdn)
return True
except socket.gaierror:
return False
def get_chrome_version(job_views):
"""
Retrieves the version of the chrome binary associated with a job.
When a test runs we query the chrome binary for it's version and drop
that value into a client keyval. To retrieve the chrome version we get all
the views associated with a test from the db, including those of the
server and client jobs, and parse the version out of the first test view
that has it. If we never ran a single test in the suite the job_views
dictionary will not contain a chrome version.
This method cannot retrieve the chrome version from a dictionary that
does not conform to the structure of an autotest tko view.
@param job_views: a list of a job's result views, as returned by
the get_detailed_test_views method in rpc_interface.
@return: The chrome version string, or None if one can't be found.
"""
# Aborted jobs have no views.
if not job_views:
return None
for view in job_views:
if (view.get('attributes')
and constants.CHROME_VERSION in view['attributes'].keys()):
return view['attributes'].get(constants.CHROME_VERSION)
logging.warning('Could not find chrome version for failure.')
return None
def get_current_board():
"""Return the current board name.
@return current board name, e.g "lumpy", None on fail.
"""
with open('/etc/lsb-release') as lsb_release_file:
for line in lsb_release_file:
m = re.match(r'^CHROMEOS_RELEASE_BOARD=(.+)$', line)
if m:
return m.group(1)
return None
# TODO(petermayo): crosbug.com/31826 Share this with _GsUpload in
# //chromite.git/buildbot/prebuilt.py somewhere/somehow
def gs_upload(local_file, remote_file, acl, result_dir=None,
transfer_timeout=300, acl_timeout=300):
"""Upload to GS bucket.
@param local_file: Local file to upload
@param remote_file: Remote location to upload the local_file to.
@param acl: name or file used for controlling access to the uploaded
file.
@param result_dir: Result directory if you want to add tracing to the
upload.
@param transfer_timeout: Timeout for this upload call.
@param acl_timeout: Timeout for the acl call needed to confirm that
the uploader has permissions to execute the upload.
@raise CmdError: the exit code of the gsutil call was not 0.
@returns True/False - depending on if the upload succeeded or failed.
"""
# https://developers.google.com/storage/docs/accesscontrol#extension
CANNED_ACLS = ['project-private', 'private', 'public-read',
'public-read-write', 'authenticated-read',
'bucket-owner-read', 'bucket-owner-full-control']
_GSUTIL_BIN = 'gsutil'
acl_cmd = None
if acl in CANNED_ACLS:
cmd = '%s cp -a %s %s %s' % (_GSUTIL_BIN, acl, local_file, remote_file)
else:
# For private uploads we assume that the overlay board is set up
# properly and a googlestore_acl.xml is present, if not this script
# errors
cmd = '%s cp -a private %s %s' % (_GSUTIL_BIN, local_file, remote_file)
if not os.path.exists(acl):
logging.error('Unable to find ACL File %s.', acl)
return False
acl_cmd = '%s setacl %s %s' % (_GSUTIL_BIN, acl, remote_file)
if not result_dir:
base_utils.run(cmd, timeout=transfer_timeout, verbose=True)
if acl_cmd:
base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True)
return True
with open(os.path.join(result_dir, 'tracing'), 'w') as ftrace:
ftrace.write('Preamble\n')
base_utils.run(cmd, timeout=transfer_timeout, verbose=True,
stdout_tee=ftrace, stderr_tee=ftrace)
if acl_cmd:
ftrace.write('\nACL setting\n')
# Apply the passed in ACL xml file to the uploaded object.
base_utils.run(acl_cmd, timeout=acl_timeout, verbose=True,
stdout_tee=ftrace, stderr_tee=ftrace)
ftrace.write('Postamble\n')
return True
def gs_ls(uri_pattern):
"""Returns a list of URIs that match a given pattern.
@param uri_pattern: a GS URI pattern, may contain wildcards
@return A list of URIs matching the given pattern.
@raise CmdError: the gsutil command failed.
"""
gs_cmd = ' '.join(['gsutil', 'ls', uri_pattern])
result = base_utils.system_output(gs_cmd).splitlines()
return [path.rstrip() for path in result if path]
def nuke_pids(pid_list, signal_queue=[signal.SIGTERM, signal.SIGKILL]):
"""
Given a list of pid's, kill them via an esclating series of signals.
@param pid_list: List of PID's to kill.
@param signal_queue: Queue of signals to send the PID's to terminate them.
"""
for sig in signal_queue:
logging.debug('Sending signal %s to the following pids:', sig)
for pid in pid_list:
logging.debug('Pid %d', pid)
try:
os.kill(pid, sig)
except OSError:
# The process may have died from a previous signal before we
# could kill it.
pass
time.sleep(CHECK_PID_IS_ALIVE_TIMEOUT)
failed_list = []
if signal.SIGKILL in signal_queue:
return
for pid in pid_list:
if base_utils.pid_is_alive(pid):
failed_list.append('Could not kill %d for process name: %s.' % pid,
base_utils.get_process_name(pid))
if failed_list:
raise error.AutoservRunError('Following errors occured: %s' %
failed_list, None)
def externalize_host(host):
"""Returns an externally accessible host name.
@param host: a host name or address (string)
@return An externally visible host name or address
"""
return socket.gethostname() if host in _LOCAL_HOST_LIST else host
def get_lab_status():
"""Grabs the current lab status and message.
@returns a dict with keys 'lab_is_up' and 'message'. lab_is_up points
to a boolean and message points to a string.
"""
result = {'lab_is_up' : True, 'message' : ''}
status_url = global_config.global_config.get_config_value('CROS',
'lab_status_url')
max_attempts = 5
retry_waittime = 1
for _ in range(max_attempts):
try:
response = urllib2.urlopen(status_url)
except IOError as e:
logging.debug('Error occured when grabbing the lab status: %s.',
e)
time.sleep(retry_waittime)
continue
# Check for successful response code.
if response.getcode() == 200:
data = json.load(response)
result['lab_is_up'] = data['general_state'] in LAB_GOOD_STATES
result['message'] = data['message']
return result
time.sleep(retry_waittime)
# We go ahead and say the lab is open if we can't get the status.
logging.warn('Could not get a status from %s', status_url)
return result
def check_lab_status(board=None):
"""Check if the lab is up and if we can schedule suites to run.
Also checks if the lab is disabled for that particular board, and if so
will raise an error to prevent new suites from being scheduled for that
board.
@param board: board name that we want to check the status of.
@raises error.LabIsDownException if the lab is not up.
@raises error.BoardIsDisabledException if the desired board is currently
disabled.
"""
# Ensure we are trying to schedule on the actual lab.
if not (global_config.global_config.get_config_value('SERVER',
'hostname').startswith('cautotest')):
return
# First check if the lab is up.
lab_status = get_lab_status()
if not lab_status['lab_is_up']:
raise error.LabIsDownException('Chromium OS Lab is currently not up: '
'%s.' % lab_status['message'])
# Check if the board we wish to use is disabled.
# Lab messages should be in the format of:
# Lab is 'status' [boards not to be ran] (comment). Example:
# Lab is Open [stumpy, kiev, x86-alex] (power_resume rtc causing duts to go
# down)
boards_are_disabled = re.search('\[(.*)\]', lab_status['message'])
if board and boards_are_disabled:
if board in boards_are_disabled.group(1):
raise error.BoardIsDisabledException('Chromium OS Lab is '
'currently not allowing suites to be scheduled on board '
'%s: %s' % (board, lab_status['message']))
return
def urlopen_socket_timeout(url, data=None, timeout=5):
"""
Wrapper to urllib2.urlopen with a socket timeout.
This method will convert all socket timeouts to
TimeoutExceptions, so we can use it in conjunction
with the rpc retry decorator and continue to handle
other URLErrors as we see fit.
@param url: The url to open.
@param data: The data to send to the url (eg: the urlencoded dictionary
used with a POST call).
@param timeout: The timeout for this urlopen call.
@return: The response of the urlopen call.
@raises: error.TimeoutException when a socket timeout occurs.
urllib2.URLError for errors that not caused by timeout.
urllib2.HTTPError for errors like 404 url not found.
"""
old_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(timeout)
try:
return urllib2.urlopen(url, data=data)
except urllib2.URLError as e:
if type(e.reason) is socket.timeout:
raise error.TimeoutException(str(e))
raise
finally:
socket.setdefaulttimeout(old_timeout)