Revert the revert and fix Port FindSymbolFiles from scripts/upload_symbols.py.

This reverts the revert and fixes the post-submit test failure, which
was caused by the original version of test_GatherSymbolFiles. The
error was that the output directory was in the source directory,
which could cause a race condition failure where output files were
processed as source files.

Besides correcting the test, the new method has an assertion based on
the list of SymbolFileTuple fields rather than size so that on error
the full result mismatch is displayed, enabling more insight on
failures.

This reverts commit e879dee9b4416c638125874da3fd7279a6f6465e.

BUG=chromium:1031380
TEST=pytest

Change-Id: Ic055e2c5b13432fa28de427409f2e90b23ae5e01
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/2551591
Reviewed-by: Stephane Belmon <sbelmon@google.com>
Reviewed-by: Alex Klein <saklein@chromium.org>
Tested-by: Michael Mortensen <mmortensen@google.com>
diff --git a/service/artifacts.py b/service/artifacts.py
index 3fa5847..8b6faee 100644
--- a/service/artifacts.py
+++ b/service/artifacts.py
@@ -16,8 +16,13 @@
 import json
 import os
 import shutil
+import tempfile
+
+from typing import List
+from six.moves import urllib
 
 from chromite.lib import autotest_util
+from chromite.lib import cache
 from chromite.lib import constants
 from chromite.lib import cros_build_lib
 from chromite.lib import cros_logging as logging
@@ -695,7 +700,6 @@
 
   return CpeResult(report=report_path, warnings=warnings_path)
 
-
 def BundleGceTarball(output_dir, image_dir):
   """Bundle the test image into a tarball suitable for importing into GCE.
 
@@ -718,3 +722,95 @@
         inputs=('disk.raw',), extra_args=['--dereference', '--format=oldgnu'])
 
   return tarball
+
+
+# A SymbolFileTuple is a data object that contains:
+#  display_path (str): Relative path to the file based on initial search path.
+#  file_name (str): Full path to where the SymbolFile was found.
+# For example, if the search path for symbol files is '/some/bot/path/'
+# and a symbol file is found at '/some/bot/path/a/b/c/file1.sym',
+# then the display_path would be 'a/b/c/file1.sym' and the file_name
+# would be '/some/bot/path/a/b/c/file1.sym'.
+SymbolFileTuple = collections.namedtuple(
+    'SymbolFileTuple', ['display_path', 'file_name'])
+
+def IsTarball(path: str) -> bool:
+  """Guess if this is a tarball based on the filename."""
+  parts = path.split('.')
+  if len(parts) <= 1:
+    return False
+
+  if parts[-1] == 'tar':
+    return True
+
+  if parts[-2] == 'tar':
+    return parts[-1] in ('bz2', 'gz', 'xz')
+
+  return parts[-1] in ('tbz2', 'tbz', 'tgz', 'txz')
+
+
+def GatherSymbolFiles(tempdir:str, destdir:str,
+                    paths: List[str]) -> List[SymbolFileTuple]:
+  """Locate symbol files in |paths|
+
+  This generator function searches all paths for .sym files and copies them to
+  destdir. A path to a tarball will result in the tarball being unpacked and
+  examined. A path to a directory will result in the directory being searched
+  for .sym files. The generator yields SymbolFileTuple objects that contain
+  symbol file references which are valid after this exits. Those files may exist
+  externally, or be created in the tempdir (when expanding tarballs).
+  Note: the caller must clean up the tempdir.
+  Note: this function is recursive for tar files.
+
+  Args:
+    tempdir: Path to use for temporary files.
+    destdir: All .sym files are copied to this path.
+    paths: A list of input paths to walk. Files are returned based on .sym name
+      w/out any checking internal symbol file format.
+      Dirs are searched for files that end in ".sym". Urls are not handled.
+      Tarballs are unpacked and walked.
+
+  Yields:
+    A SymbolFileTuple for every symbol file found in paths.
+  """
+  logging.info('GatherSymbolFiles tempdir %s destdir %s paths %s',
+               tempdir, destdir, paths)
+  for p in paths:
+    o = urllib.parse.urlparse(p)
+    if o.scheme:
+      raise NotImplementedError('URL paths are not expected/handled: ',p)
+
+    elif os.path.isdir(p):
+      for root, _, files in os.walk(p):
+        for f in files:
+          if f.endswith('.sym'):
+            # If p is '/tmp/foo' and filename is '/tmp/foo/bar/bar.sym',
+            # display_path = 'bar/bar.sym'
+            filename = os.path.join(root, f)
+            display_path = filename[len(p):].lstrip('/')
+            try:
+              # TODO(crbug.com/1031380): Put calls to shutil.copy in a function
+              # that handles collisions in destdir due to different paths
+              # to the same filename (foo/a/a.sym from path foo, bar/a/a.sym
+              # from path bar) since the last shutil.copy will overwrite
+              # previous one.
+              shutil.copy(filename, os.path.join(destdir, display_path))
+            except IOError:
+              # Handles pre-3.3 Python where we may need to make the target
+              # path's dirname before copying.
+              os.makedirs(os.path.join(destdir, os.path.dirname(display_path)))
+              shutil.copy(filename, os.path.join(destdir, display_path))
+            yield SymbolFileTuple(display_path=display_path,
+                             file_name=filename)
+
+    elif IsTarball(p):
+      tardir = tempfile.mkdtemp(dir=tempdir)
+      cache.Untar(os.path.realpath(p), tardir)
+      for sym in GatherSymbolFiles(tardir, destdir, [tardir]):
+        yield sym
+
+    else:
+      # Path p is a file.
+      # TODO(crbug.com/1031380): Before copying verify that p ends with .sym.
+      shutil.copy(p, destdir)
+      yield SymbolFileTuple(display_path=p, file_name=p)
diff --git a/service/artifacts_unittest.py b/service/artifacts_unittest.py
index 7a834ab..f82561a 100644
--- a/service/artifacts_unittest.py
+++ b/service/artifacts_unittest.py
@@ -7,6 +7,8 @@
 
 from __future__ import print_function
 
+from operator import attrgetter
+
 import json
 import os
 import shutil
@@ -739,3 +741,143 @@
     # Verify the symlink points the the test image.
     disk_raw = os.path.join(call_tempdir, 'disk.raw')
     self.assertEqual(os.readlink(disk_raw), self.image_file)
+
+
+class TestGatherSymbolFiles(cros_test_lib.MockTempDirTestCase):
+  """Base class for testing GatherSymbolFiles."""
+
+  SLIM_CONTENT = """
+some junk
+"""
+
+  FAT_CONTENT = """
+STACK CFI 1234
+some junk
+STACK CFI 1234
+"""
+
+
+  def createSymbolFile(self, filename, content=FAT_CONTENT, size=0):
+    """Create a symbol file using content with minimum size."""
+    osutils.SafeMakedirs(os.path.dirname(filename))
+
+    # If a file size is given, force that to be the minimum file size. Create
+    # a sparse file so large files are practical.
+    with open(filename, 'w+b') as f:
+      f.truncate(size)
+      f.seek(0)
+      f.write(content.encode('utf-8'))
+
+  def test_GatherSymbolFiles(self):
+    """Test that files are found and copied."""
+    # Create directory with some symbol files.
+    tar_tmp_dir = os.path.join(self.tempdir, 'tar_tmp')
+    output_dir = os.path.join(self.tempdir, 'output')
+    input_dir = os.path.join(self.tempdir, 'input')
+    osutils.SafeMakedirs(output_dir)
+    self.createSymbolFile(os.path.join(input_dir, 'a/b/c/file1.sym'))
+    self.createSymbolFile(os.path.join(input_dir, 'a/b/c/d/file2.sym'))
+    self.createSymbolFile(os.path.join(input_dir, 'a/file3.sym'))
+    self.createSymbolFile(os.path.join(input_dir, 'a/b/c/d/e/file1.sym'))
+
+    # Call artifacts.GatherSymbolFiles to find symbol files under self.tempdir
+    # and copy them to output_dir.
+    symbol_files = list(artifacts.GatherSymbolFiles(
+        tar_tmp_dir, output_dir, [input_dir]))
+
+    # Construct the expected symbol files. Note that the SymbolFileTuple
+    # field file_name is the full path to where a symbol file was found,
+    # while display_path is the relative path (from the search) where
+    # it is created in the output directory.
+    expected_symbol_files = [
+        artifacts.SymbolFileTuple(
+            file_name=os.path.join(input_dir, 'a/b/c/file1.sym'),
+            display_path='a/b/c/file1.sym'),
+        artifacts.SymbolFileTuple(
+            file_name=os.path.join(input_dir, 'a/b/c/d/file2.sym'),
+            display_path='a/b/c/d/file2.sym'),
+        artifacts.SymbolFileTuple(
+            file_name=os.path.join(input_dir, 'a/file3.sym'),
+            display_path='a/file3.sym'),
+        artifacts.SymbolFileTuple(
+            file_name=os.path.join(input_dir, 'a/b/c/d/e/file1.sym'),
+            display_path='a/b/c/d/e/file1.sym')
+    ]
+
+    # Sort symbol_files and expected_output_files by the display_path attribute.
+    symbol_files = sorted(symbol_files, key=attrgetter('display_path'))
+    expected_symbol_files = sorted(expected_symbol_files,
+                                   key=attrgetter('display_path'))
+    # Compare the files to the expected files. This verifies the size and
+    # contents, and on failure shows the full contents.
+    self.assertEqual(symbol_files, expected_symbol_files)
+
+    # Verify that the files in output_dir match the SymbolFile display_paths.
+    files_in_output_dir = self.getFilesWithRelativeDir(output_dir)
+    files_in_output_dir.sort()
+    symbol_file_display_paths = [obj.display_path for obj in symbol_files]
+    symbol_file_display_paths.sort()
+    self.assertEqual(files_in_output_dir, symbol_file_display_paths)
+
+    # Verify that the display_name of each symbol does not contain pathsep.
+    symbol_file_display_names = [
+        os.path.basename(obj.display_path) for obj in symbol_files
+    ]
+    for display_name in symbol_file_display_names:
+      self.assertEqual(-1, display_name.find(os.path.sep))
+
+  def test_FindSymbolTarFiles(self):
+    """Test that symbol files in tar files are extracted."""
+    output_dir = os.path.join(self.tempdir, 'output')
+    osutils.SafeMakedirs(output_dir)
+
+    # Set up test input directory.
+    tarball_dir = os.path.join(self.tempdir, 'some/path')
+    files_in_tarball = ['dir1/fileZ.sym', 'dir2/fileY.sym', 'dir2/fileX.sym',
+                        'fileA.sym', 'fileB.sym', 'fileC.sym']
+    for filename in files_in_tarball:
+      self.createSymbolFile(os.path.join(tarball_dir, filename))
+    temp_tarball_file_path = os.path.join(self.tempdir, 'symfiles.tar')
+    cros_build_lib.CreateTarball(temp_tarball_file_path, tarball_dir)
+    # Now that we've created the tarball, remove the .sym files in
+    # the tarball dir and move the tarball to that dir.
+    for filename in files_in_tarball:
+      os.remove(os.path.join(tarball_dir, filename))
+    tarball_path = os.path.join(tarball_dir, 'symfiles.tar')
+    shutil.move(temp_tarball_file_path, tarball_path)
+
+    # Execute artifacts.GatherSymbolFiles where the path contains the tarball.
+    symbol_files = list(artifacts.GatherSymbolFiles(
+        tarball_dir, output_dir, [tarball_path]))
+
+    self.assertEqual(len(symbol_files), 6)
+    symbol_file_display_names = [
+        os.path.basename(obj.display_path) for obj in symbol_files
+    ]
+    symbol_file_display_names.sort()
+    self.assertEqual(symbol_file_display_names,
+                     ['fileA.sym', 'fileB.sym', 'fileC.sym',
+                      'fileX.sym', 'fileY.sym', 'fileZ.sym'])
+
+    # Verify that the files in output_dir match the SymbolFile display_paths.
+    files_in_output_dir = self.getFilesWithRelativeDir(output_dir)
+    files_in_output_dir.sort()
+    symbol_file_display_paths = [obj.display_path for obj in symbol_files]
+    symbol_file_display_paths.sort()
+    self.assertEqual(files_in_output_dir, symbol_file_display_paths)
+    # Verify that the display_name of each symbol does not contain pathsep.
+    symbol_file_display_names = [
+        os.path.basename(obj.display_path) for obj in symbol_files
+    ]
+    for display_name in symbol_file_display_names:
+      self.assertEqual(-1, display_name.find(os.path.sep))
+
+  def getFilesWithRelativeDir(self, dest_dir):
+    """Find all files below dest_dir using dir relative to dest_dir."""
+    relative_files = []
+    for path, __, files in os.walk(dest_dir):
+      for filename in files:
+        fullpath = os.path.join(path, filename)
+        relpath = os.path.relpath(fullpath, dest_dir)
+        relative_files.append(relpath)
+    return relative_files