| #!/usr/bin/python | 
 | # | 
 | # Copyright (c) 2015 The Chromium OS Authors. All rights reserved. | 
 | # Use of this source code is governed by a BSD-style license that can be | 
 | # found in the LICENSE file. | 
 |  | 
 |  | 
 | """Tool for cleaning up labels that are not in use. | 
 |  | 
 | Delete given labels from database when they are not in use. | 
 | Labels that match the query `SELECT_USED_LABELS_FORMAT` are considered in use. | 
 | When given labels are not in the used labels, those labels are deleted. | 
 |  | 
 | For example, following command deletes all labels whose name begins with | 
 | 'cros-version' and are not in use. | 
 |  | 
 | ./label_cleaner.py -p cros-version | 
 |  | 
 | If '-p' option is not given, we delete labels whose name is exactly | 
 | 'cros-version' and are not in use. | 
 | """ | 
 |  | 
 |  | 
 | import argparse | 
 | import logging | 
 | import os | 
 | import socket | 
 | import sys | 
 | import tempfile | 
 |  | 
 | import common | 
 | # Installed via build_externals, must be after import common. | 
 | import MySQLdb | 
 | from autotest_lib.client.common_lib import global_config | 
 | from autotest_lib.client.common_lib import logging_config | 
 | from autotest_lib.server import frontend | 
 | from chromite.lib import metrics | 
 | from chromite.lib import ts_mon_config | 
 |  | 
 |  | 
 | _METRICS_PREFIX = 'chromeos/autotest/afe_db/admin/label_cleaner' | 
 |  | 
 | GLOBAL_AFE = global_config.global_config.get_config_value( | 
 |         'SERVER', 'global_afe_hostname') | 
 | DB_SERVER = global_config.global_config.get_config_value('AUTOTEST_WEB', 'host') | 
 | USER = global_config.global_config.get_config_value('AUTOTEST_WEB', 'user') | 
 | PASSWD = global_config.global_config.get_config_value( | 
 |         'AUTOTEST_WEB', 'password') | 
 | DATABASE = global_config.global_config.get_config_value( | 
 |         'AUTOTEST_WEB', 'database') | 
 | RESPECT_STATIC_LABELS = global_config.global_config.get_config_value( | 
 |         'SKYLAB', 'respect_static_labels', type=bool, default=False) | 
 |  | 
 | # Per-prefix metrics are generated only for the following prefixes. This | 
 | # whitelist is a second level defence against populating the 'label_prefix' | 
 | # field with arbitrary values provided on the commandline. | 
 | _LABEL_PREFIX_METRICS_WHITELIST = ( | 
 |         'cros-version', | 
 |         'fwro-version', | 
 |         'fwrw-version', | 
 |         'pool', | 
 | ) | 
 |  | 
 | SELECT_USED_LABELS_FORMAT = """ | 
 | SELECT DISTINCT(label_id) FROM afe_autotests_dependency_labels UNION | 
 | SELECT DISTINCT(label_id) FROM afe_hosts_labels UNION | 
 | SELECT DISTINCT(label_id) FROM afe_jobs_dependency_labels UNION | 
 | SELECT DISTINCT(label_id) FROM afe_shards_labels UNION | 
 | SELECT DISTINCT(label_id) FROM afe_parameterized_jobs UNION | 
 | SELECT DISTINCT(meta_host) FROM afe_host_queue_entries | 
 | """ | 
 |  | 
 | SELECT_REPLACED_LABELS = """ | 
 | SELECT label_id FROM afe_replaced_labels | 
 | """ | 
 |  | 
 | DELETE_LABELS_FORMAT = """ | 
 | DELETE FROM afe_labels WHERE id in (%s) | 
 | """ | 
 |  | 
 |  | 
 | def get_used_labels(conn): | 
 |     """Get labels that are currently in use. | 
 |  | 
 |     @param conn: MySQLdb Connection object. | 
 |  | 
 |     @return: A list of label ids. | 
 |     """ | 
 |     cursor = conn.cursor() | 
 |     sql = SELECT_USED_LABELS_FORMAT | 
 |     logging.debug('Running: %r', sql) | 
 |     cursor.execute(sql) | 
 |     rows = cursor.fetchall() | 
 |     return set(r[0] for r in rows) | 
 |  | 
 |  | 
 | def fetch_labels(conn, label=None, prefix=False): | 
 |     """Fetch labels from database. | 
 |  | 
 |     @param conn: MySQLdb Connection object. | 
 |     @param label: (optional) Label name to fetch. | 
 |     @param prefix: If True, use `label` as a prefix. Otherwise, fetch | 
 |                    labels whose name is exactly same as `label`. | 
 |  | 
 |     @return: A list of label ids. | 
 |     """ | 
 |     cursor = conn.cursor() | 
 |     if label is not None: | 
 |         if prefix: | 
 |             sql = 'SELECT id FROM afe_labels WHERE name LIKE "%s%%"' % label | 
 |         else: | 
 |             sql = 'SELECT id FROM afe_labels WHERE name = "%s"' % label | 
 |     else: | 
 |         sql = 'SELECT id FROM afe_labels' | 
 |     logging.debug('Running: %r', sql) | 
 |     cursor.execute(sql) | 
 |     rows = cursor.fetchall() | 
 |     # Don't delete labels whose replaced_by_static_label=True, since they're | 
 |     # actually maintained by afe_static_labels, not afe_labels. | 
 |     if not RESPECT_STATIC_LABELS: | 
 |         return set(r[0] for r in rows) | 
 |     else: | 
 |         cursor.execute(SELECT_REPLACED_LABELS) | 
 |         replaced_labels = cursor.fetchall() | 
 |         replaced_label_ids = set([r[0] for r in replaced_labels]) | 
 |         return set(r[0] for r in rows) - replaced_label_ids | 
 |  | 
 |  | 
 | def _delete_labels(conn, labels, dry_run): | 
 |     """Helper function of `delete_labels`.""" | 
 |     labels_str = ','.join([str(l) for l in labels]) | 
 |     sql = DELETE_LABELS_FORMAT % labels_str | 
 |     if dry_run: | 
 |         logging.info('[DRY RUN] Would have run: %r', sql) | 
 |     else: | 
 |         logging.debug('Running: %r', sql) | 
 |         conn.cursor().execute(sql) | 
 |         conn.commit() | 
 |  | 
 |  | 
 | def delete_labels(conn, labels, max_delete, dry_run=False): | 
 |     """Delete given labels from database. | 
 |  | 
 |     @param conn: MySQLdb Connection object. | 
 |     @param labels: iterable of labels to delete. | 
 |     @param max_delete: Max number of records to delete in a query. | 
 |     @param dry_run: (Boolean) Whether this is a dry run. | 
 |     """ | 
 |     while labels: | 
 |         chunk = labels[:max_delete] | 
 |         labels = labels[max_delete:] | 
 |         _delete_labels(conn, chunk, dry_run) | 
 |  | 
 |  | 
 | def is_primary_server(): | 
 |     """Check if this server's status is primary | 
 |  | 
 |     @return: True if primary, False otherwise. | 
 |     """ | 
 |     server = frontend.AFE(server=GLOBAL_AFE).run( | 
 |             'get_servers', hostname=socket.getfqdn()) | 
 |     if server and server[0]['status'] == 'primary': | 
 |         return True | 
 |     return False | 
 |  | 
 |  | 
 | def clean_labels(options): | 
 |     """Cleans unused labels from AFE database""" | 
 |     msg = 'Label cleaner starts. Will delete ' | 
 |     if options.prefix: | 
 |         msg += 'all labels whose prefix is "%s".' | 
 |     else: | 
 |         msg += 'a label "%s".' | 
 |     logging.info(msg, options.label) | 
 |     logging.info('Target database: %s.', options.db_server) | 
 |     if options.check_status and not is_primary_server(): | 
 |         raise Exception('Cannot run in a non-primary server') | 
 |  | 
 |     conn = MySQLdb.connect( | 
 |             host=options.db_server, | 
 |             user=options.db_user, | 
 |             passwd=options.db_password, | 
 |             db=DATABASE, | 
 |     ) | 
 |  | 
 |     all_labels = fetch_labels(conn) | 
 |     logging.info('Found total %d labels', len(all_labels)) | 
 |     metrics.Gauge(_METRICS_PREFIX + '/total_labels_count').set( | 
 |             len(all_labels), | 
 |             fields={ | 
 |                     'target_db': options.db_server, | 
 |                     'label_prefix': '', | 
 |             }, | 
 |     ) | 
 |  | 
 |     labels = fetch_labels(conn, options.label, options.prefix) | 
 |     logging.info('Found total %d labels matching %s', len(labels), | 
 |                  options.label) | 
 |     if options.prefix and options.label in _LABEL_PREFIX_METRICS_WHITELIST: | 
 |         metrics.Gauge(_METRICS_PREFIX + '/total_labels_count').set( | 
 |                 len(labels), | 
 |                 fields={ | 
 |                         'target_db': options.db_server, | 
 |                         'label_prefix': options.label, | 
 |                 }, | 
 |         ) | 
 |  | 
 |     used_labels = get_used_labels(conn) | 
 |     logging.info('Found %d labels are used', len(used_labels)) | 
 |     metrics.Gauge(_METRICS_PREFIX + '/used_labels_count').set( | 
 |             len(used_labels), fields={'target_db': options.db_server}) | 
 |  | 
 |     to_delete = list(labels - used_labels) | 
 |     logging.info('Deleting %d unused labels', len(to_delete)) | 
 |     delete_labels(conn, to_delete, options.max_delete, options.dry_run) | 
 |     metrics.Counter(_METRICS_PREFIX + '/labels_deleted').increment_by( | 
 |             len(to_delete), fields={'target_db': options.db_server}) | 
 |  | 
 |  | 
 | def main(): | 
 |     """Cleans unused labels from AFE database""" | 
 |     parser = argparse.ArgumentParser( | 
 |             formatter_class=argparse.ArgumentDefaultsHelpFormatter) | 
 |     parser.add_argument( | 
 |             '--db', | 
 |             dest='db_server', | 
 |             help='Database server', | 
 |             default=DB_SERVER, | 
 |     ) | 
 |     parser.add_argument( | 
 |             '--db-user', | 
 |             dest='db_user', | 
 |             help='Database user', | 
 |             default=USER, | 
 |     ) | 
 |     parser.add_argument( | 
 |             '--db-password', | 
 |             dest='db_password', | 
 |             help='Database password', | 
 |             default=PASSWD, | 
 |     ) | 
 |     parser.add_argument( | 
 |             '-p', | 
 |             dest='prefix', | 
 |             action='store_true', | 
 |             help=('Use argument <label> as a prefix for matching. ' | 
 |                   'For example, when the argument <label> is "cros-version" ' | 
 |                   'and this option is enabled, then labels whose name ' | 
 |                   'beginning with "cros-version" are matched. When this ' | 
 |                   'option is disabled, we match labels whose name is ' | 
 |                   'exactly same as the argument <label>.'), | 
 |     ) | 
 |     parser.add_argument( | 
 |             '-n', | 
 |             dest='max_delete', | 
 |             type=int, | 
 |             help='Max number of records to delete in each query.', | 
 |             default=100, | 
 |     ) | 
 |     parser.add_argument( | 
 |             '-s', | 
 |             dest='check_status', | 
 |             action='store_true', | 
 |             help='Enforce to run only in a server that has primary status', | 
 |     ) | 
 |     parser.add_argument( | 
 |             '--dry-run', | 
 |             dest='dry_run', | 
 |             action='store_true', | 
 |             help='Dry run mode. Do not actually delete any labels.', | 
 |     ) | 
 |     parser.add_argument('label', help='Label name to delete') | 
 |     options = parser.parse_args() | 
 |  | 
 |     logging_config.LoggingConfig().configure_logging( | 
 |             datefmt='%Y-%m-%d %H:%M:%S', | 
 |             verbose=True) | 
 |  | 
 |     if options.dry_run: | 
 |         tfd, metrics_file=tempfile.mkstemp() | 
 |         os.close(tfd) | 
 |         ts_mon_context = ts_mon_config.SetupTsMonGlobalState( | 
 |                 'afe_label_cleaner', | 
 |                 auto_flush=False, | 
 |                 debug_file=metrics_file, | 
 |         ) | 
 |     else: | 
 |         ts_mon_context = ts_mon_config.SetupTsMonGlobalState( | 
 |                 'afe_label_cleaner', | 
 |                 auto_flush=False, | 
 |         ) | 
 |     with ts_mon_context: | 
 |         try: | 
 |             clean_labels(options) | 
 |         except: | 
 |             metrics.Counter(_METRICS_PREFIX + '/tick').increment( | 
 |                     fields={'target_db': options.db_server, | 
 |                             'success': False}) | 
 |             raise | 
 |         else: | 
 |             metrics.Counter(_METRICS_PREFIX + '/tick').increment( | 
 |                     fields={'target_db': options.db_server, | 
 |                             'success': True}) | 
 |         finally: | 
 |             metrics.Flush() | 
 |             if options.dry_run: | 
 |                 logging.info('Dumped ts_mon metrics to %s', metrics_file) | 
 |  | 
 |  | 
 | if __name__ == '__main__': | 
 |     sys.exit(main()) |