artifact_stages: upload LLVM's HEAD SHA with PGO artifacts

It's helpful if we have a small JSON file that describes the LLVM we
built PGO on to go with our uploaded PGO artifacts. This CL adds that
JSON file to our uploaded artifacts.

This file is expected to be <100 bytes, so no compression/etc. is done.

It can be packed in the profile tarball if people want, but it's easier
to work with (e.g. via `gsutil cat`) if it's not.

BUG=None
TEST=./run_tests and a pgo-generate tryjob

Change-Id: I6e8ae8b5ba08dc911892b50b73a8d8eeaec39929
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/1634798
Reviewed-by: Manoj Gupta <manojgupta@chromium.org>
Reviewed-by: Zhizhou Yang <zhizhouy@google.com>
Reviewed-by: David Burger <dburger@chromium.org>
Commit-Queue: George Burgess <gbiv@chromium.org>
Tested-by: George Burgess <gbiv@chromium.org>
diff --git a/cbuildbot/stages/artifact_stages.py b/cbuildbot/stages/artifact_stages.py
index 2484f5c..598fb03 100644
--- a/cbuildbot/stages/artifact_stages.py
+++ b/cbuildbot/stages/artifact_stages.py
@@ -12,6 +12,7 @@
 import itertools
 import json
 import multiprocessing
+import re
 import os
 import shutil
 
@@ -945,6 +946,7 @@
 
   category = constants.CI_INFRA_STAGE
   PROFDATA_TAR = 'llvm_profdata.tar.xz'
+  LLVM_METADATA = 'llvm_metadata.json'
   PROFDATA = 'llvm.profdata'
   SYS_DEVEL_DIR = 'var/tmp/portage/sys-devel'
 
@@ -953,6 +955,56 @@
     self._upload_queue = multiprocessing.Queue()
     self._merge_cmd = ''
 
+  @staticmethod
+  def _ParseUseFlagState(use_flags):
+    """Converts the textual output of equery to a +/- USE flag list."""
+    # Equery prints out a large header. The lines we're interested in look
+    # like:
+    # " + - use_flag : foo", where `use_flag` is the name of the use flag, the
+    # initial - or + says whether the flag is enabled by default, and the
+    # second one says whether the flag was enabled upon installation. `foo` is
+    # the description, but that's unimportant to us.
+    matcher = re.compile(r'^\s+[+-]\s+([+-])\s+(\S+)\s+:', re.MULTILINE)
+    matches = matcher.findall(use_flags)
+    return [state + flag_name for state, flag_name in matches]
+
+  @staticmethod
+  def _ParseLLVMHeadSHA(version_string):
+    # The first line of clang's version string looks something like:
+    # Chromium OS 9.0_pre353983_p20190325-r13 clang version 9.0.0 \
+    # (/var/cache/chromeos-cache/distfiles/host/egit-src/llvm-project \
+    # de7a0a152648d1a74cf4319920b1848aa00d1ca3) (based on LLVM 9.0.0svn)
+    #
+    # The SHA after llvm-project is the SHA we're looking for.
+    # Note that len('de7a0a152648d1a74cf4319920b1848aa00d1ca3') == 40.
+    sha_re = re.compile(r'([A-Fa-f0-9]{40})\)\s+\(based on LLVM [\d+.]+svn\)$')
+    first_line = version_string.splitlines()[0].strip()
+    match = sha_re.search(first_line)
+    if not match:
+      raise ValueError('Can\'t recognize the version string %r' % first_line)
+    return match.group(1)
+
+  def _CollectLLVMMetadata(self):
+    def check_chroot_output(command):
+      cmd = cros_build_lib.RunCommand(command, enter_chroot=True,
+                                      redirect_stdout=True)
+      return cmd.output
+
+    # The baked-in clang should be the one we're looking for. If not, yell.
+    llvm_uses = check_chroot_output(
+        ['equery', '-C', '-N', 'uses', 'sys-devel/llvm'])
+    use_vars = self._ParseUseFlagState(llvm_uses)
+    if '+llvm_pgo_generate' not in use_vars:
+      raise ValueError('The pgo_generate flag isn\'t enabled; USE flags: %r' %
+                       sorted(use_vars))
+
+    clang_version_str = check_chroot_output(['clang', '--version'])
+    head_sha = self._ParseLLVMHeadSHA(clang_version_str)
+    metadata_output_path = os.path.join(self.archive_path, self.LLVM_METADATA)
+    osutils.WriteFile(metadata_output_path, json.dumps({'head_sha': head_sha}))
+    # This is a tiny JSON file, so it doesn't need to be tarred/compressed.
+    self._upload_queue.put([metadata_output_path])
+
   def _CollectPGOProfiles(self):
     """Collect and upload PGO profiles for the board."""
     assert self.archive_path.startswith(self._build_root)
@@ -999,6 +1051,7 @@
   def PerformStage(self):
     with self.ArtifactUploader(self._upload_queue, archive=False):
       self._CollectPGOProfiles()
+      self._CollectLLVMMetadata()
 
 
 # This stage generates and uploads the orderfile files for Chrome build.
diff --git a/cbuildbot/stages/artifact_stages_unittest.py b/cbuildbot/stages/artifact_stages_unittest.py
index 4c8d906..0b5f8ef 100644
--- a/cbuildbot/stages/artifact_stages_unittest.py
+++ b/cbuildbot/stages/artifact_stages_unittest.py
@@ -7,6 +7,7 @@
 
 from __future__ import print_function
 
+import json
 import mock
 import os
 import sys
@@ -532,6 +533,16 @@
 
   RELEASE_TAG = ''
 
+  _VALID_CLANG_VERSION_SHA = 'de7a0a152648d1a74cf4319920b1848aa00d1ca3'
+  _VALID_CLANG_VERSION_STRING = (
+      'Chromium OS 9.0_pre353983_p20190325-r13 clang version 9.0.0 '
+      '(/var/cache/chromeos-cache/distfiles/host/egit-src/llvm-project '
+      'de7a0a152648d1a74cf4319920b1848aa00d1ca3) (based on LLVM 9.0.0svn)\n'
+      'Target: x86_64-pc-linux-gnu\n'
+      'Thread model: posix\n'
+      'InstalledDir: /usr/bin\n'
+  )
+
   # pylint: disable=protected-access
 
   def setUp(self):
@@ -545,6 +556,102 @@
     return artifact_stages.CollectPGOProfilesStage(self._run, self.buildstore,
                                                    self._current_board)
 
+  def testParseLLVMHeadSHA(self):
+    stage = self.ConstructStage()
+    actual_sha = stage._ParseLLVMHeadSHA(self._VALID_CLANG_VERSION_STRING)
+    self.assertEqual(actual_sha, self._VALID_CLANG_VERSION_SHA)
+
+  def _MetadataMultiDispatch(self, equery_uses_fn, clang_version_fn):
+    def result(command, enter_chroot, redirect_stdout):
+      self.assertTrue(enter_chroot)
+      self.assertTrue(redirect_stdout)
+
+      if command == ['equery', '-C', '-N', 'uses', 'sys-devel/llvm']:
+        stdout = equery_uses_fn()
+      elif command == ['clang', '--version']:
+        stdout = clang_version_fn()
+      else:
+        raise ValueError('Unexpected command: %s' % command)
+
+      return cros_build_lib.CommandResult(output=stdout)
+
+    return result
+
+  def testCollectLLVMMetadataRaisesOnAnInvalidVersionString(self):
+    stage = self.ConstructStage()
+
+    def equery_uses():
+      return ' - + llvm_pgo_generate :'
+
+    def clang_version():
+      valid_version_lines = self._VALID_CLANG_VERSION_STRING.splitlines()
+      valid_version_lines[0] = 'clang version 8.0.1\n'
+      return ''.join(valid_version_lines)
+
+    with patch(cros_build_lib, 'RunCommand') as run_command:
+      run_command.side_effect = self._MetadataMultiDispatch(
+          equery_uses_fn=equery_uses,
+          clang_version_fn=clang_version)
+
+      with self.assertRaises(ValueError) as raised:
+        stage._CollectLLVMMetadata()
+
+      self.assertIn('version string', raised.exception.message)
+
+  def testCollectLLVMMetadataRaisesIfClangIsntPGOGenerated(self):
+    stage = self.ConstructStage()
+
+    def clang_version():
+      return self._VALID_CLANG_VERSION_STRING
+
+    with patch(cros_build_lib, 'RunCommand') as run_command:
+      for uses in ['', ' - - llvm_pgo_generate :']:
+        def equery_uses():
+          # We're using a loop var on purpose; this function should die by the
+          # end of the current iteration.
+          # pylint: disable=cell-var-from-loop
+          return uses
+
+        run_command.side_effect = self._MetadataMultiDispatch(
+            equery_uses_fn=equery_uses,
+            clang_version_fn=clang_version)
+
+        with self.assertRaises(ValueError) as raised:
+          stage._CollectLLVMMetadata()
+
+        self.assertIn('pgo_generate flag', raised.exception.message)
+
+  def testCollectLLVMMetadataFunctionsInASimpleCase(self):
+    def clang_version():
+      return self._VALID_CLANG_VERSION_STRING
+
+    def equery_uses():
+      return ' - + llvm_pgo_generate :'
+
+    stage = self.ConstructStage()
+
+    run_command = self.PatchObject(cros_build_lib, 'RunCommand')
+    run_command.side_effect = self._MetadataMultiDispatch(equery_uses,
+                                                          clang_version)
+    write_file = self.PatchObject(osutils, 'WriteFile')
+    upload_queue_put = self.PatchObject(stage._upload_queue, 'put')
+    stage._CollectLLVMMetadata()
+
+    write_file.assert_called_once()
+    upload_queue_put.assert_called_once()
+
+    (written_file, metadata_json), kwargs = write_file.call_args
+    self.assertEqual(kwargs, {})
+
+    expected_metadata = {
+        'head_sha': self._VALID_CLANG_VERSION_SHA,
+    }
+
+    given_metadata = json.loads(metadata_json)
+    self.assertEqual(given_metadata, expected_metadata)
+
+    upload_queue_put.assert_called_with([written_file])
+
   def testCollectPGOProfiles(self):
     """Test that the sysroot generation was called correctly."""
     stage = self.ConstructStage()