| #!/usr/bin/env python3 |
| # -*- coding: utf-8 -*- |
| # Copyright 2019 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Download profdata from different arches, merge them and upload to gs. |
| |
| The script is used for updating the PGO profiles for LLVM. The workflow |
| is that the script will download profdata from different PGO builds, merge |
| them and then upload it to a gs location that LLVM can access. |
| |
| The simplest way of using this script, is to run: |
| ./merge_profdata_and_upload.py --all_latest_profiles |
| which will automatically grab profdata from latest PGO generate builders |
| for three different architectures and merge them. LLVM hash is also |
| detected automatically from the artifacts. |
| |
| If you want to specify certain llvm hash, run it with: |
| ./merge_profdata_and_upload.py --all_latest_profiles --llvm_hash LLVM_HASH |
| Note that hash checking will fail if the llvm hash you provided is not the |
| same as those in artifacts, or llvm hash in different artifacts are not the |
| same. |
| |
| To only use profiles from buildbucket tasks for PGO generate, run it with: |
| ./merge_profdata_and_upload.py -b amd64/bb_id1 -b arm/bb_id2 ... |
| The buildbucket id can be found using `bb ls` command after manually launched |
| builder finishes. |
| |
| There is a chance that builders only succeeded partially, in this case, you |
| can run this script to merge both profdata from builder scheduled and manually |
| launched: |
| ./merge_profdata_and_upload.py -l arm -l amd64 -b arm64/bb_id |
| In this example, the script will merge profdata from arm and amd64 builder, and |
| profdata from an arm64 buildbucket task. |
| """ |
| |
| |
| import argparse |
| import collections |
| import distutils.spawn |
| import json |
| import os |
| import os.path |
| import shutil |
| import subprocess |
| import sys |
| import tempfile |
| |
| |
| _LLVM_PROFDATA = "/usr/bin/llvm-profdata" |
| _GS_PREFIX = "gs://" |
| |
| _LLVMMetadata = collections.namedtuple("_LLVMMetadata", ["head_sha"]) |
| |
| |
| def _fetch_gs_artifact(remote_name, local_name): |
| """Fetch single file from remote gs location to local. |
| |
| Args: |
| remote_name: full gs location to the file. |
| local_name: the name of local file to be copied to. |
| """ |
| assert remote_name.startswith(_GS_PREFIX) |
| subprocess.check_call(["gsutil", "cp", remote_name, local_name]) |
| |
| |
| def _get_gs_profdata(remote_profdata, arch): |
| """Fetch and extract profdata from remote gs location. |
| |
| Args: |
| remote_profdata: remote gs location of the profdata tarball. |
| arch: directory named with arch to saperate each profdata. |
| |
| Returns: |
| Local location of the extracted profdata. |
| """ |
| tar = "llvm_profdata.tar.xz" |
| _fetch_gs_artifact(remote_profdata, tar) |
| extract_cmd = ["tar", "-xvf", tar] |
| |
| profdata_name = subprocess.check_output(extract_cmd).strip() |
| # The output of the `tar` command should only contain one line of the |
| # extracted profdata name. |
| if b".llvm.profdata" not in profdata_name: |
| raise RuntimeError("No profdata in the tarball: %s" % remote_profdata) |
| |
| os.mkdir(arch) |
| profdata_loc = os.path.join(arch, "llvm.profdata") |
| os.rename(profdata_name, profdata_loc) |
| print("Profdata extracted to: %s" % profdata_loc) |
| return profdata_loc |
| |
| |
| def _get_gs_metadata(remote_metadata): |
| """Fetch metadata from remote gs location and read the LLVM head_sha. |
| |
| Args: |
| remote_metadata: remote gs location of the metadata json file. |
| |
| Returns: |
| LLVM head_sha metadata |
| """ |
| metadata_basename = "llvm_metadata.json" |
| _fetch_gs_artifact(remote_metadata, metadata_basename) |
| |
| with open(metadata_basename) as f: |
| result = json.load(f) |
| |
| return _LLVMMetadata(head_sha=result["head_sha"]) |
| |
| |
| def _find_latest_artifacts(gs_url, arch): |
| """Fetch the latest profdata and metadata from a give gs location. |
| |
| Args: |
| gs_url: a gs location containing one or more artifacts to fetch. |
| arch: the arch profdata collected from. |
| |
| Returns: |
| A tuple of local profdata location and metadata |
| """ |
| assert gs_url.startswith(_GS_PREFIX) |
| try: |
| # List all artifacts in the gs location and sort by time. |
| output = ( |
| subprocess.check_output( |
| ["gsutil", "ls", "-l", gs_url], encoding="utf-8" |
| ) |
| .strip() |
| .split("\n") |
| ) |
| lines = sorted(output, key=lambda x: x.split()[1], reverse=True) |
| except subprocess.CalledProcessError: |
| raise RuntimeError("Artifacts not found: %s" % gs_url) |
| |
| # Use a loop to go through all artifacts to find the latest profdata. |
| # An example of the output of latest builder bucket: |
| # pylint: disable=line-too-long |
| # 5006528 2020-05-31T10:08:48Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz |
| # 56 2020-05-31T10:08:48Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json |
| # 5005952 2020-05-24T10:53:34Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz |
| # 56 2020-05-24T10:53:34Z gs://chromeos-toolchain-artifacts/llvm-pgo/arm/llvm-11.0_pre387436_p20200403-r5-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json |
| # An example for the lines of buildbucket location: |
| # 5004260 2020-05-29T09:48:04Z gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm.profdata.tar.xz |
| # 56 2020-05-29T09:48:04Z gs://chromeos-image-archive/arm-pgo-generate-llvm-next-toolchain/R85-13254.0.0-1-8879010326583123168/llvm-11.0_pre387436_p20200403-r7-a8e5dcb072b1f794883ae8125fb08c06db678d56.llvm_metadata.json |
| # pylint: enable=line-too-long |
| profdata_url = "" |
| for line in lines: |
| url = line.split()[-1] |
| if ".llvm.profdata.tar.xz" in url: |
| profile_path = _get_gs_profdata(url, arch) |
| profdata_url = url |
| break |
| if not profile_path or not profdata_url: |
| raise RuntimeError("No profdata found from %s" % gs_url) |
| |
| metadata_url = profdata_url.replace( |
| ".llvm.profdata.tar.xz", ".llvm_metadata.json" |
| ) |
| metadata = _get_gs_metadata(metadata_url) |
| if not metadata: |
| raise RuntimeError("No metadata found from %s" % gs_url) |
| return metadata, profile_path |
| |
| |
| def _fetch_from_latest(arch): |
| """Fetch artifacts from latest builders. |
| |
| Args: |
| arch: the arch profdata collected from. |
| |
| Returns: |
| A tuple of local profdata location and metadata |
| """ |
| print("\nFETCHING LATEST PROFDATA ON %s..." % arch.upper()) |
| remote_latest = "%schromeos-toolchain-artifacts/llvm-pgo/%s" % ( |
| _GS_PREFIX, |
| arch, |
| ) |
| return _find_latest_artifacts(remote_latest, arch) |
| |
| |
| def _fetch_from_buildbucket(arch, bb): |
| """Fetch artifacts from buildbucket task. |
| |
| Args: |
| arch: the arch profdata collected from. |
| bb: buildbucket id. |
| |
| Returns: |
| A tuple of local profdata location and metadata |
| """ |
| print("\nFETCHING BUILDBUCKET PROFDATA ON %s..." % arch.upper()) |
| remote_arch = ( |
| "%schromeos-image-archive/%s-pgo-generate-llvm-next-toolchain" |
| % ( |
| _GS_PREFIX, |
| arch, |
| ) |
| ) |
| # List all buckets under {arch}-pgo-generate-llvm-next-toolchain and |
| # grep with buildbucket id. |
| remote_bb = ( |
| subprocess.check_output(["gsutil", "ls", remote_arch], encoding="utf-8") |
| .strip() |
| .split("\n") |
| ) |
| for line in remote_bb: |
| if bb in line: |
| return _find_latest_artifacts(line, arch) |
| raise RuntimeError( |
| "No matched results found in %s with bb: %s" % (arch, bb) |
| ) |
| |
| |
| def _merge_profdata(profdata_list, output_name): |
| """Merge profdata. |
| |
| Args: |
| profdata_list: list of profdata location of each arch. |
| output_name: name of merged profdata. |
| """ |
| merge_cmd = [ |
| _LLVM_PROFDATA, |
| "merge", |
| "-output", |
| output_name, |
| ] + profdata_list |
| print("\nMerging PGO profiles.\nCMD: %s" % merge_cmd) |
| subprocess.check_call(merge_cmd) |
| |
| |
| def _tar_and_upload_profdata(profdata, name_suffix): |
| """Create a tarball of merged profdata and upload to certain gs location. |
| |
| Args: |
| profdata: location of merged profdata. |
| name_suffix: usually the LLVM head_sha. |
| """ |
| tarball = "llvm-profdata-%s.tar.xz" % name_suffix |
| print("Making profdata tarball: %s" % tarball) |
| subprocess.check_call( |
| ["tar", "--sparse", "-I", "xz", "-cf", tarball, profdata] |
| ) |
| |
| upload_location = "%schromeos-localmirror/distfiles/%s" % ( |
| _GS_PREFIX, |
| tarball, |
| ) |
| |
| # TODO: it's better to create a subdir: distfiles/llvm_pgo_profile, but |
| # now llvm could only recognize distfiles. |
| upload_cmd = [ |
| "gsutil", |
| "-m", |
| "cp", |
| "-n", |
| "-a", |
| "public-read", |
| tarball, |
| upload_location, |
| ] |
| print("\nUploading tarball to gs.\nCMD: %s\n" % upload_cmd) |
| |
| # gsutil prints all status to stderr, oddly enough. |
| gs_output = subprocess.check_output( |
| upload_cmd, stderr=subprocess.STDOUT, encoding="utf-8" |
| ) |
| |
| # gsutil exits successfully even if it uploaded nothing. It prints a summary |
| # of what all it did, though. Successful uploads are just a progress bar, |
| # unsuccessful ones note that items were skipped. |
| if "Skipping existing item" in gs_output: |
| raise ValueError( |
| "Profile upload failed: would overwrite an existing " |
| "profile at %s" % upload_location |
| ) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description=__doc__, |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| ) |
| parser.add_argument( |
| "-a", |
| "--all_latest_profiles", |
| action="store_true", |
| help="Merge and upload profiles from the latest builders.", |
| ) |
| parser.add_argument( |
| "-l", |
| "--latest", |
| default=[], |
| action="append", |
| help="User can specify the profdata from which builder with specific " |
| "architecture to download. By default, we merge profdata from arm, " |
| "arm64, amd64.", |
| ) |
| parser.add_argument( |
| "-b", |
| "--buildbucket", |
| default=[], |
| action="append", |
| help="Extra pgo-generate-llvm-next-toolchain buildbucket results to be " |
| "used. Format should be: {arch}/{bb_id}.", |
| ) |
| parser.add_argument( |
| "-o", |
| "--output", |
| default="llvm.profdata", |
| help="Where to put merged PGO profile. The default is to not save it " |
| "anywhere.", |
| ) |
| parser.add_argument( |
| "--llvm_hash", |
| help="The LLVM hash to select for the profiles. Generally autodetected.", |
| ) |
| args = parser.parse_args() |
| |
| if not args.all_latest_profiles and not (args.latest or args.buildbucket): |
| parser.error( |
| "Please specify whether to use latest profiles or " |
| "profiles from buildbucket" |
| ) |
| |
| if args.all_latest_profiles and (args.latest or args.buildbucket): |
| parser.error( |
| "--all_latest_profiles cannot be specified together " |
| "with --latest or --buildbucket" |
| ) |
| |
| latest = ( |
| ["arm", "arm64", "amd64"] if args.all_latest_profiles else args.latest |
| ) |
| |
| all_arch_list = latest.copy() |
| arch_bb_list = [] |
| if args.buildbucket: |
| for arch_bb in args.buildbucket: |
| arch, bb = arch_bb.split("/") |
| arch_bb_list.append((arch, bb)) |
| all_arch_list.append(arch) |
| |
| if len(set(all_arch_list)) != len(all_arch_list): |
| parser.error("Each arch can be only passed once.") |
| |
| if not distutils.spawn.find_executable(_LLVM_PROFDATA): |
| sys.exit(_LLVM_PROFDATA + " not found; are you in the chroot?") |
| |
| initial_dir = os.getcwd() |
| temp_dir = tempfile.mkdtemp(prefix="merge_pgo") |
| success = True |
| try: |
| os.chdir(temp_dir) |
| profdata_list = [] |
| heads = set() |
| |
| def append_artifacts(fetched_tuple): |
| llvm_metadata, profdata_loc = fetched_tuple |
| if os.path.getsize(profdata_loc) < 512 * 1024: |
| raise RuntimeError( |
| "The PGO profile in local path %s is suspiciously " |
| "small. Something might have gone " |
| "wrong." % profdata_loc |
| ) |
| heads.add(llvm_metadata.head_sha) |
| profdata_list.append(profdata_loc) |
| |
| for arch in latest: |
| append_artifacts(_fetch_from_latest(arch)) |
| |
| for arch, bb in arch_bb_list: |
| append_artifacts(_fetch_from_buildbucket(arch, bb)) |
| |
| assert heads, "Didn't fetch anything?" |
| |
| def die_with_head_complaint(complaint): |
| extra = " (HEADs found: %s)" % sorted(heads) |
| raise RuntimeError(complaint.rstrip() + extra) |
| |
| llvm_hash = args.llvm_hash |
| if not llvm_hash: |
| if len(heads) != 1: |
| die_with_head_complaint( |
| "%d LLVM HEADs were found, which is more than one. You probably " |
| "want a consistent set of HEADs for a profile. If you know you " |
| "don't, please specify --llvm_hash, and note that *all* profiles " |
| "will be merged into this final profile, regardless of their " |
| "reported HEAD." % len(heads) |
| ) |
| (llvm_hash,) = heads |
| |
| if llvm_hash not in heads: |
| assert llvm_hash == args.llvm_hash |
| die_with_head_complaint( |
| "HEAD %s wasn't found in any fetched artifacts." % llvm_hash |
| ) |
| |
| print("\nUsing LLVM hash: %s" % llvm_hash) |
| |
| _merge_profdata(profdata_list, args.output) |
| print("Merged profdata locates at %s" % os.path.abspath(args.output)) |
| _tar_and_upload_profdata(args.output, name_suffix=llvm_hash) |
| print("\nMerged profdata uploaded successfully.") |
| except: |
| success = False |
| raise |
| finally: |
| os.chdir(initial_dir) |
| if success: |
| print("Clearing temp directory.") |
| shutil.rmtree(temp_dir, ignore_errors=True) |
| else: |
| print("Script fails, temp directory is at: %s" % temp_dir) |
| |
| |
| if __name__ == "__main__": |
| sys.exit(main()) |