blob: 0a7d8ac29c6b88c027d786d051e371d85a31d3d0 [file] [log] [blame]
from __future__ import print_function
import argparse
import multiprocessing.pool
import subprocess
import sys
import common
from autotest_lib.server import frontend
from autotest_lib.site_utils.lib import infra
DEPLOY_PRODUCTION_LOCAL = ('/usr/local/autotest/site_utils/'
PUSH_ORDER = {'database': 0,
'drone': 1,
'shard': 1,
'golo_proxy': 1,
'scheduler': 2,
'host_scheduler': 2,
'suite_scheduler': 2}
def discover_servers(afe, server_filter=set()):
"""Discover the in-production servers to update.
@param afe: Server to contact with RPC requests.
@param server_filter: A set of servers to get status for.
@returns: A list of a list of tuple of (server_name, server_status, roles).
The list is sorted by the order to be updated. Servers in the same
sublist can be pushed together.
# Example server details....
# {
# 'hostname': 'server1',
# 'status': 'backup',
# 'roles': ['drone', 'scheduler'],
# 'attributes': {'max_processes': 300}
# }
rpc = frontend.AFE(server=afe)
servers ='get_servers')
# Do not update servers that need repair, and filter the server list by
# given server_filter if needed.
servers = [s for s in servers
if (s['status'] != 'repair_required' and
(not server_filter or s['hostname'] in server_filter))]
# Do not update reserve, devserver or crash_server (not YET supported).
servers = [s for s in servers if 'devserver' not in s['roles'] and
'crash_server' not in s['roles'] and
'reserve' not in s['roles']]
sorted_servers = []
for i in range(max(PUSH_ORDER.values()) + 1):
servers_with_unknown_order = []
for server in servers:
info = (server['hostname'], server['status'], server['roles'])
order = min([PUSH_ORDER[r] for r in server['roles']
if r in PUSH_ORDER])
except ValueError:
# All roles are not indexed in PUSH_ORDER.
# Push all servers with unknown roles together.
if servers_with_unknown_order:
found_servers = set([s['hostname'] for s in servers])
# Inject the servers passed in by user but not found in server database.
extra_servers = []
for server in server_filter - found_servers:
extra_servers.append((server, 'unknown', ['unknown']))
if extra_servers:
return sorted_servers
def parse_arguments(args):
"""Parse command line arguments.
@param args: The command line arguments to parse. (usually sys.argv[1:])
@returns An argparse.Namespace populated with argument values.
parser = argparse.ArgumentParser(
description='Command to update an entire autotest installation.',
epilog=('Update all servers:\n'
'Update one server:\n'
' <server>\n'
'Send arguments to remote\n'
' -- --dryrun\n'
'See what arguments would be run on specified servers:\n'
' --dryrun <server_a> <server_b> --'
' --skip-update\n'))
parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
help='Log all deploy script output.')
parser.add_argument('--continue', action='store_true', dest='cont',
help='Continue to the next server on failure.')
parser.add_argument('--afe', default='cautotest',
help='What is the main server for this installation? (cautotest).')
parser.add_argument('--dryrun', action='store_true',
help='Don\'t actually run remote commands.')
parser.add_argument('args', nargs=argparse.REMAINDER,
help=('<server>, <server> ... -- <remote_arg>, <remote_arg> ...'))
results = parser.parse_args(args)
# We take the args list and further split it down. Everything before --
# is a server name, and everything after it is an argument to pass along
# to
# This:
# server_a, server_b -- --dryrun --skip-report
# Becomes:
# args.servers['server_a', 'server_b']
# args.args['--dryrun', '--skip-report']
local_args_index = results.args.index('--') + 1
except ValueError:
# If -- isn't present, they are all servers.
results.servers = results.args
results.args = []
# Split arguments.
results.servers = results.args[:local_args_index-1]
results.args = results.args[local_args_index:]
return results
def update_server(inputs):
"""Deploy for given server.
@param inputs: Inputs for the update action, including:
server: Name of the server to update.
status: Status of the server.
options: Options for the update.
@return: A tuple of (server, success, output), where:
server: Name of the server to be updated.
sucess: True if update succeeds, False otherwise.
output: A string of the deploy_production_local script output
including any errors.
server = inputs['server']
status = inputs['status']
options = inputs['options']
print('Updating server %s...' % server)
if status == 'backup':
extra_args = ['--skip-service-status']
extra_args = []
cmd = ('%s %s' %
(DEPLOY_PRODUCTION_LOCAL, ' '.join(options.args + extra_args)))
output = '%s: %s' % (server, cmd)
success = True
if not options.dryrun:
output = infra.execute_command(server, cmd)
except subprocess.CalledProcessError as e:
success = False
output = e.output
return server, success, output
def update_in_parallel(servers, options):
"""Update a group of servers in parallel.
Exit the process with error if any server failed to be updated and
options.cont is not set.
@param servers: A list of tuple of (server_name, server_status, roles).
@param options: Options for the push.
args = []
for server, status, _ in servers:
args.append({'server': server,
'status': status,
'options': options})
# The update actions run in parallel. If any update failed, we should wait
# for other running updates being finished. Abort in the middle of an update
# may leave the server in a bad state.
pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
failed_servers = []
results = pool.imap_unordered(update_server, args)
for server, success, output in results:
if options.dryrun:
print('Dry run, updating server %s is skipped.' % server)
elif success:
print('Successfully updated server %s.' % server)
if options.verbose:
error = ('Failed to update server %s.\nError: %s' %
(server, output))
if failed_servers and not options.cont:
print('Error! Failed to update following servers: %s' %
def update_group(servers, options):
"""Update a group of servers in parallel.
Exit the process with error if any server failed to be updated and
options.cont is not set.
@param servers: A list of tuple of (server_name, server_status, roles).
@param options: Options for the push.
# If it's allowed to continue updating even after some update fails, update
# all servers together.
if options.cont:
update_in_parallel(servers, options)
# Pick on server per role in the group to update first. Abort if any update
# failed.
server_per_role = {}
# Each server can be used to qualify only one role.
server_picked = set()
for server, status, roles in servers:
for role in roles:
if not role in server_per_role and not server in server_picked:
server_per_role[role] = (server, status, roles)
update_in_parallel(server_per_role.values(), options)
rest_servers = [s for s in servers if not s[0] in server_picked]
update_in_parallel(rest_servers, options)
def main(args):
"""Main routine that drives all the real work.
@param args: The command line arguments to parse. (usually sys.argv[1:])
@returns The system exit code.
options = parse_arguments(args)
print('Retrieving server status...')
sorted_servers = discover_servers(options.afe, set(options.servers or []))
# Display what we plan to update.
print('Will update (in this order):')
i = 1
for servers in sorted_servers:
print('%s Group %d (%d servers) %s' % ('='*30, i, len(servers), '='*30))
for server, status, roles in servers:
print('\t%-36s:\t%s\t%s' % (server, status, roles))
i += 1
for servers in sorted_servers:
update_group(servers, options)
if __name__ == '__main__':