| #!/usr/bin/env python3 |
| # -*- coding: utf-8 -*- |
| # Copyright 2019 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Download profdata from different arches, merge them and upload to gs. |
| |
| The script is used for updating the PGO profiles for LLVM. The workflow |
| is that the script will download profdata from different PGO builds, merge |
| them and then upload it to a gs location that LLVM can access. |
| |
| The simplest way of using this script, is to run: |
| ./merge_profdata_and_upload.py --all_latest_profiles |
| which will automatically grab profdata from latest PGO generate builders |
| for three different architectures and merge them. LLVM hash is also |
| detected automatically from the artifacts. |
| |
| If you want to specify certain llvm hash, run it with: |
| ./merge_profdata_and_upload.py --all_latest_profiles --llvm_hash LLVM_HASH |
| Note that hash checking will fail if the llvm hash you provided is not the |
| same as those in artifacts, or llvm hash in different artifacts are not the |
| same. |
| |
| To only use profiles from buildbucket tasks for PGO generate, run it with: |
| ./merge_profdata_and_upload.py -b amd64/bb_id1 -b arm/bb_id2 ... |
| The buildbucket id can be found using `bb ls` command after manually launched |
| builder finishes. |
| |
| There is a chance that builders only succeeded partially, in this case, you |
| can run this script to merge both profdata from builder scheduled and manually |
| launched: |
| ./merge_profdata_and_upload.py -l arm -l amd64 -b arm64/bb_id |
| In this example, the script will merge profdata from arm and amd64 builder, and |
| profdata from an arm64 buildbucket task. |
| """ |
| |
| from __future__ import print_function |
| |
| import argparse |
| import collections |
| import distutils.spawn |
| import json |
| import os |
| import os.path |
| import shutil |
| import subprocess |
| import sys |
| import tempfile |
| |
| _LLVM_PROFDATA = '/usr/bin/llvm-profdata' |
| _GS_PREFIX = 'gs://' |
| |
| _LLVMMetadata = collections.namedtuple('_LLVMMetadata', ['head_sha']) |
| |
| |
| def _fetch_gs_artifact(remote_name, local_name): |
| """Fetch single file from remote gs location to local. |
| |
| Args: |
| remote_name: full gs location to the file. |
| local_name: the name of local file to be copied to. |
| """ |
| assert remote_name.startswith(_GS_PREFIX) |
| subprocess.check_call(['gsutil', 'cp', remote_name, local_name]) |
| |
| |
| def _get_gs_profdata(remote_profdata, arch): |
| """Fetch and extract profdata from remote gs location. |
| |
| Args: |
| remote_profdata: remote gs location of the profdata tarball. |
| arch: directory named with arch to saperate each profdata. |
| |
| Returns: |
| Local location of the extracted profdata. |
| """ |
| tar = 'llvm_profdata.tar.xz' |
| _fetch_gs_artifact(remote_profdata, tar) |
| extract_cmd = ['tar', '-xvf', tar] |
| |
| profdata_name = subprocess.check_output(extract_cmd).strip() |
| # The output of the `tar` command should only contain one line of the |
| # extracted profdata name. |
| if b'.llvm.profdata' not in profdata_name: |
| raise RuntimeError('No profdata in the tarball: %s' % remote_profdata) |
| |
| os.mkdir(arch) |
| profdata_loc = os.path.join(arch, 'llvm.profdata') |
| os.rename(profdata_name, profdata_loc) |
| print('Profdata extracted to: %s' % profdata_loc) |
| return profdata_loc |
| |
| |
| def _get_gs_metadata(remote_metadata): |
| """Fetch metadata from remote gs location and read the LLVM head_sha. |
| |
| Args: |
| remote_metadata: remote gs location of the metadata json file. |
| |
| Returns: |
| LLVM head_sha metadata |
| """ |
| metadata_basename = 'llvm_metadata.json' |
| _fetch_gs_artifact(remote_metadata, metadata_basename) |
| |
| with open(metadata_basename) as f: |
| result = json.load(f) |
| |
| return _LLVMMetadata(head_sha=result['head_sha']) |
| |
| |
| def _find_latest_artifacts(gs_url, arch): |
| """Fetch the latest profdata and metadata from a give gs location. |
| |
| Args: |
| gs_url: a gs location containing one or more artifacts to fetch. |
| arch: the arch profdata collected from. |
| |
| Returns: |
| A tuple of local profdata location and metadata |
| """ |
| assert gs_url.startswith(_GS_PREFIX) |
| try: |
| # List all artifacts in the gs location and sort by time. |
| output = subprocess.check_output(['gsutil', 'ls', '-l', gs_url], |
| encoding='utf-8').strip().split('\n') |
| lines = sorted(output, key=lambda x: x.split()[1], reverse=True) |
| except subprocess.CalledProcessError: |
| raise RuntimeError('Artifacts not found: %s' % gs_url) |
| |
| # Use a loop to go through all artifacts to find the latest profdata. |
| # An example of the output of latest builder bucket: |
| # pylint: disable=line-too-long |
| # 5006528 2020-05-31T10:08:48Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz |
| # 56 2020-05-31T10:08:48Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json |
| # 5005952 2020-05-24T10:53:34Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz |
| # 56 2020-05-24T10:53:34Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json |
| # An example for the lines of buildbucket location: |
| # 5004260 2020-05-29T09:48:04Z gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz |
| # 56 2020-05-29T09:48:04Z gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json |
| # pylint: enable=line-too-long |
| profdata_url = '' |
| for line in lines: |
| url = line.split()[-1] |
| if '.llvm.profdata.tar.xz' in url: |
| profile_path = _get_gs_profdata(url, arch) |
| profdata_url = url |
| break |
| if not profile_path or not profdata_url: |
| raise RuntimeError('No profdata found from %s' % gs_url) |
| |
| metadata_url = profdata_url.replace('.llvm.profdata.tar.xz', |
| '.llvm_metadata.json') |
| metadata = _get_gs_metadata(metadata_url) |
| if not metadata: |
| raise RuntimeError('No metadata found from %s' % gs_url) |
| return metadata, profile_path |
| |
| |
| def _fetch_from_latest(arch): |
| """Fetch artifacts from latest builders. |
| |
| Args: |
| arch: the arch profdata collected from. |
| |
| Returns: |
| A tuple of local profdata location and metadata |
| """ |
| print('\nFETCHING LATEST PROFDATA ON %s...' % arch.upper()) |
| remote_latest = ( |
| '%schromeos-toolchain-artifacts/llvm-pgo/%s' % (_GS_PREFIX, arch)) |
| return _find_latest_artifacts(remote_latest, arch) |
| |
| |
| def _fetch_from_buildbucket(arch, bb): |
| """Fetch artifacts from buildbucket task. |
| |
| Args: |
| arch: the arch profdata collected from. |
| bb: buildbucket id. |
| |
| Returns: |
| A tuple of local profdata location and metadata |
| """ |
| print('\nFETCHING BUILDBUCKET PROFDATA ON %s...' % arch.upper()) |
| remote_arch = ('%schromeos-image-archive/%s-pgo-generate-llvm-next-toolchain' |
| % (_GS_PREFIX, arch)) |
| # List all buckets under {arch}-pgo-generate-llvm-next-toolchain and |
| # grep with buildbucket id. |
| remote_bb = subprocess.check_output(['gsutil', 'ls', remote_arch], |
| encoding='utf-8').strip().split('\n') |
| for line in remote_bb: |
| if bb in line: |
| return _find_latest_artifacts(line, arch) |
| raise RuntimeError('No matched results found in %s with bb: %s' % (arch, bb)) |
| |
| |
| def _merge_profdata(profdata_list, output_name): |
| """Merge profdata. |
| |
| Args: |
| profdata_list: list of profdata location of each arch. |
| output_name: name of merged profdata. |
| """ |
| merge_cmd = [_LLVM_PROFDATA, 'merge', '-output', output_name] + profdata_list |
| print('\nMerging PGO profiles.\nCMD: %s' % merge_cmd) |
| subprocess.check_call(merge_cmd) |
| |
| |
| def _tar_and_upload_profdata(profdata, name_suffix): |
| """Create a tarball of merged profdata and upload to certain gs location. |
| |
| Args: |
| profdata: location of merged profdata. |
| name_suffix: usually the LLVM head_sha. |
| """ |
| tarball = 'llvm-profdata-%s.tar.xz' % name_suffix |
| print('Making profdata tarball: %s' % tarball) |
| subprocess.check_call( |
| ['tar', '--sparse', '-I', 'xz', '-cf', tarball, profdata]) |
| |
| upload_location = '%schromeos-localmirror/distfiles/%s' % (_GS_PREFIX, |
| tarball) |
| |
| # TODO: it's better to create a subdir: distfiles/llvm_pgo_profile, but |
| # now llvm could only recognize distfiles. |
| upload_cmd = [ |
| 'gsutil', |
| '-m', |
| 'cp', |
| '-n', |
| '-a', |
| 'public-read', |
| tarball, |
| upload_location, |
| ] |
| print('\nUploading tarball to gs.\nCMD: %s\n' % upload_cmd) |
| |
| # gsutil prints all status to stderr, oddly enough. |
| gs_output = subprocess.check_output( |
| upload_cmd, stderr=subprocess.STDOUT, encoding='utf-8') |
| |
| # gsutil exits successfully even if it uploaded nothing. It prints a summary |
| # of what all it did, though. Successful uploads are just a progress bar, |
| # unsuccessful ones note that items were skipped. |
| if 'Skipping existing item' in gs_output: |
| raise ValueError('Profile upload failed: would overwrite an existing ' |
| 'profile at %s' % upload_location) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
| parser.add_argument( |
| '-a', |
| '--all_latest_profiles', |
| action='store_true', |
| help='Merge and upload profiles from the latest builders.') |
| parser.add_argument( |
| '-l', |
| '--latest', |
| default=[], |
| action='append', |
| help='User can specify the profdata from which builder with specific ' |
| 'architecture to download. By default, we merge profdata from arm, ' |
| 'arm64, amd64.') |
| parser.add_argument( |
| '-b', |
| '--buildbucket', |
| default=[], |
| action='append', |
| help='Extra pgo-generate-llvm-next-toolchain buildbucket results to be ' |
| 'used. Format should be: {arch}/{bb_id}.') |
| parser.add_argument( |
| '-o', |
| '--output', |
| default='llvm.profdata', |
| help='Where to put merged PGO profile. The default is to not save it ' |
| 'anywhere.') |
| parser.add_argument( |
| '--llvm_hash', |
| help='The LLVM hash to select for the profiles. Generally autodetected.') |
| args = parser.parse_args() |
| |
| if not args.all_latest_profiles and not (args.latest or args.buildbucket): |
| parser.error('Please specify whether to use latest profiles or ' |
| 'profiles from buildbucket') |
| |
| if args.all_latest_profiles and (args.latest or args.buildbucket): |
| parser.error('--all_latest_profiles cannot be specified together ' |
| 'with --latest or --buildbucket') |
| |
| latest = ['arm', 'arm64', 'amd64'] \ |
| if args.all_latest_profiles else args.latest |
| |
| all_arch_list = latest.copy() |
| arch_bb_list = [] |
| if args.buildbucket: |
| for arch_bb in args.buildbucket: |
| arch, bb = arch_bb.split('/') |
| arch_bb_list.append((arch, bb)) |
| all_arch_list.append(arch) |
| |
| if len(set(all_arch_list)) != len(all_arch_list): |
| parser.error('Each arch can be only passed once.') |
| |
| if not distutils.spawn.find_executable(_LLVM_PROFDATA): |
| sys.exit(_LLVM_PROFDATA + ' not found; are you in the chroot?') |
| |
| initial_dir = os.getcwd() |
| temp_dir = tempfile.mkdtemp(prefix='merge_pgo') |
| success = True |
| try: |
| os.chdir(temp_dir) |
| profdata_list = [] |
| heads = set() |
| |
| def append_artifacts(fetched_tuple): |
| llvm_metadata, profdata_loc = fetched_tuple |
| if os.path.getsize(profdata_loc) < 512 * 1024: |
| raise RuntimeError('The PGO profile in local path %s is suspiciously ' |
| 'small. Something might have gone ' |
| 'wrong.' % profdata_loc) |
| heads.add(llvm_metadata.head_sha) |
| profdata_list.append(profdata_loc) |
| |
| for arch in latest: |
| append_artifacts(_fetch_from_latest(arch)) |
| |
| for arch, bb in arch_bb_list: |
| append_artifacts(_fetch_from_buildbucket(arch, bb)) |
| |
| assert heads, "Didn't fetch anything?" |
| |
| def die_with_head_complaint(complaint): |
| extra = ' (HEADs found: %s)' % sorted(heads) |
| raise RuntimeError(complaint.rstrip() + extra) |
| |
| llvm_hash = args.llvm_hash |
| if not llvm_hash: |
| if len(heads) != 1: |
| die_with_head_complaint( |
| '%d LLVM HEADs were found, which is more than one. You probably ' |
| 'want a consistent set of HEADs for a profile. If you know you ' |
| "don't, please specify --llvm_hash, and note that *all* profiles " |
| 'will be merged into this final profile, regardless of their ' |
| 'reported HEAD.' % len(heads)) |
| llvm_hash, = heads |
| |
| if llvm_hash not in heads: |
| assert llvm_hash == args.llvm_hash |
| die_with_head_complaint( |
| "HEAD %s wasn't found in any fetched artifacts." % llvm_hash) |
| |
| print('\nUsing LLVM hash: %s' % llvm_hash) |
| |
| _merge_profdata(profdata_list, args.output) |
| print('Merged profdata locates at %s' % os.path.abspath(args.output)) |
| _tar_and_upload_profdata(args.output, name_suffix=llvm_hash) |
| print('\nMerged profdata uploaded successfully.') |
| except: |
| success = False |
| raise |
| finally: |
| os.chdir(initial_dir) |
| if success: |
| print('Clearing temp directory.') |
| shutil.rmtree(temp_dir, ignore_errors=True) |
| else: |
| print('Script fails, temp directory is at: %s' % temp_dir) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |