crosperf: Fix BadChecksum failure on coral

Ignore "core id", "apicid", "initial apicid" fields from cpuinfo in
machine checksum calculation. The values may differ on the same type of
machines.

Add more descriptive output for the BadChecksum error.
Unittest is updated accordingly.

BUG=chromium:1145386
TEST=./run_tests.sh in crosperf.

Change-Id: Ifcc91fb70f02c41d77787fbb665741bc130152c0
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/2523398
Tested-by: Denis Nikitin <denik@chromium.org>
Reviewed-by: Bob Haarman <inglorion@chromium.org>
Commit-Queue: Denis Nikitin <denik@chromium.org>
diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py
index 0b38eef..aaf09bf 100644
--- a/crosperf/machine_manager.py
+++ b/crosperf/machine_manager.py
@@ -141,7 +141,12 @@
 
   def _ComputeMachineChecksumString(self):
     self.checksum_string = ''
-    exclude_lines_list = ['MHz', 'BogoMIPS', 'bogomips']
+    # Some lines from cpuinfo have to be excluded because they are not
+    # persistent across DUTs.
+    # MHz, BogoMIPS are dynamically changing values.
+    # core id, apicid are identifiers assigned on startup
+    # and may differ on the same type of machine.
+    exclude_lines_list = ['MHz', 'BogoMIPS', 'bogomips', 'core id', 'apicid']
     for line in self.cpuinfo.splitlines():
       if not any(e in line for e in exclude_lines_list):
         self.checksum_string += line
@@ -220,8 +225,8 @@
     self.logger = lgr or logger.GetLogger()
 
     if self.locks_dir and not os.path.isdir(self.locks_dir):
-      raise MissingLocksDirectory(
-          'Cannot access locks directory: %s' % self.locks_dir)
+      raise MissingLocksDirectory('Cannot access locks directory: %s' %
+                                  self.locks_dir)
 
     self._initialized_machines = []
     self.chromeos_root = chromeos_root
@@ -242,8 +247,8 @@
     ret, version, _ = self.ce.CrosRunCommandWOutput(
         cmd, machine=machine.name, chromeos_root=self.chromeos_root)
     if ret != 0:
-      raise CrosCommandError(
-          "Couldn't get Chrome version from %s." % machine.name)
+      raise CrosCommandError("Couldn't get Chrome version from %s." %
+                             machine.name)
 
     if ret != 0:
       version = ''
@@ -312,20 +317,33 @@
     # Since this is used for cache lookups before the machines have been
     # compared/verified, check here to make sure they all have the same
     # checksum (otherwise the cache lookup may not be valid).
-    common_checksum = None
+    base = None
     for machine in self.GetMachines(label):
       # Make sure the machine's checksums are calculated.
       if not machine.machine_checksum:
         machine.SetUpChecksumInfo()
-      cs = machine.machine_checksum
-      # If this is the first machine we've examined, initialize
-      # common_checksum.
-      if not common_checksum:
-        common_checksum = cs
+      # Use the first machine as the basis for comparison.
+      if not base:
+        base = machine
       # Make sure this machine's checksum matches our 'common' checksum.
-      if cs != common_checksum:
-        raise BadChecksum('Machine checksums do not match!')
-    self.machine_checksum[label.name] = common_checksum
+      if base.machine_checksum != machine.machine_checksum:
+        # Found a difference. Fatal error.
+        # Extract non-matching part and report it.
+        for mismatch_index in range(len(base.checksum_string)):
+          if (mismatch_index >= len(machine.checksum_string) or
+              base.checksum_string[mismatch_index] !=
+              machine.checksum_string[mismatch_index]):
+            break
+        # We want to show some context after the mismatch.
+        end_ind = mismatch_index + 8
+        # Print a mismatching string.
+        raise BadChecksum(
+            'Machine checksums do not match!\n'
+            'Diff:\n'
+            f'{base.name}: {base.checksum_string[:end_ind]}\n'
+            f'{machine.name}: {machine.checksum_string[:end_ind]}\n'
+            '\nCheck for matching /proc/cpuinfo and /proc/meminfo on DUTs.\n')
+    self.machine_checksum[label.name] = base.machine_checksum
 
   def ComputeCommonCheckSumString(self, label):
     # The assumption is that this function is only called AFTER
@@ -369,8 +387,8 @@
 
       if self.log_level != 'verbose':
         self.logger.LogOutput('Setting up remote access to %s' % machine_name)
-        self.logger.LogOutput(
-            'Checking machine characteristics for %s' % machine_name)
+        self.logger.LogOutput('Checking machine characteristics for %s' %
+                              machine_name)
       cm = CrosMachine(machine_name, self.chromeos_root, self.log_level)
       if cm.machine_checksum:
         self._all_machines.append(cm)
@@ -410,8 +428,8 @@
 
       if self.acquire_timeout < 0:
         self.logger.LogFatal('Could not acquire any of the '
-                             "following machines: '%s'" % ', '.join(
-                                 machine.name for machine in machines))
+                             "following machines: '%s'" %
+                             ', '.join(machine.name for machine in machines))
 
 
 ###      for m in self._machines:
@@ -664,8 +682,8 @@
       for m in self._all_machines:
         assert m.name != machine_name, 'Tried to double-add %s' % machine_name
       cm = MockCrosMachine(machine_name, self.chromeos_root, self.log_level)
-      assert cm.machine_checksum, (
-          'Could not find checksum for machine %s' % machine_name)
+      assert cm.machine_checksum, ('Could not find checksum for machine %s' %
+                                   machine_name)
       # In Original MachineManager, the test is 'if cm.machine_checksum:' - if a
       # machine is unreachable, then its machine_checksum is None. Here we
       # cannot do this, because machine_checksum is always faked, so we directly
diff --git a/crosperf/machine_manager_unittest.py b/crosperf/machine_manager_unittest.py
index 26eacbd..f47cc88 100755
--- a/crosperf/machine_manager_unittest.py
+++ b/crosperf/machine_manager_unittest.py
@@ -44,8 +44,8 @@
         assert m.name != machine_name, 'Tried to double-add %s' % machine_name
       cm = machine_manager.MockCrosMachine(machine_name, self.chromeos_root,
                                            'average')
-      assert cm.machine_checksum, (
-          'Could not find checksum for machine %s' % machine_name)
+      assert cm.machine_checksum, ('Could not find checksum for machine %s' %
+                                   machine_name)
       self._all_machines.append(cm)
 
 
@@ -87,9 +87,10 @@
   def setUp(self, mock_isdir):
 
     mock_isdir.return_value = True
-    self.mm = machine_manager.MachineManager(
-        '/usr/local/chromeos', 0, 'average', None, self.mock_cmd_exec,
-        self.mock_logger)
+    self.mm = machine_manager.MachineManager('/usr/local/chromeos', 0,
+                                             'average', None,
+                                             self.mock_cmd_exec,
+                                             self.mock_logger)
 
     self.mock_lumpy1.name = 'lumpy1'
     self.mock_lumpy2.name = 'lumpy2'
@@ -225,15 +226,14 @@
     self.assertEqual(mock_sleep.call_count, 0)
 
   def test_compute_common_checksum(self):
-
     self.mm.machine_checksum = {}
     self.mm.ComputeCommonCheckSum(LABEL_LUMPY)
     self.assertEqual(self.mm.machine_checksum['lumpy'], 'lumpy123')
     self.assertEqual(len(self.mm.machine_checksum), 1)
 
     self.mm.machine_checksum = {}
-    self.assertRaises(machine_manager.BadChecksum,
-                      self.mm.ComputeCommonCheckSum, LABEL_MIX)
+    self.assertRaisesRegex(machine_manager.BadChecksum, r'daisy.*\n.*lumpy',
+                           self.mm.ComputeCommonCheckSum, LABEL_MIX)
 
   def test_compute_common_checksum_string(self):
     self.mm.machine_checksum_string = {}
@@ -583,8 +583,8 @@
 CHECKSUM_STRING = ('processor: 0vendor_id: GenuineIntelcpu family: 6model: '
                    '42model name: Intel(R) Celeron(R) CPU 867 @ '
                    '1.30GHzstepping: 7microcode: 0x25cache size: 2048 '
-                   'KBphysical id: 0siblings: 2core id: 0cpu cores: 2apicid: '
-                   '0initial apicid: 0fpu: yesfpu_exception: yescpuid level: '
+                   'KBphysical id: 0siblings: 2cpu cores: 2'
+                   'fpu: yesfpu_exception: yescpuid level: '
                    '13wp: yesflags: fpu vme de pse tsc msr pae mce cx8 apic sep'
                    ' mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse '
                    'sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc '
@@ -597,8 +597,8 @@
                    'bits virtualpower management:processor: 1vendor_id: '
                    'GenuineIntelcpu family: 6model: 42model name: Intel(R) '
                    'Celeron(R) CPU 867 @ 1.30GHzstepping: 7microcode: 0x25cache'
-                   ' size: 2048 KBphysical id: 0siblings: 2core id: 1cpu cores:'
-                   ' 2apicid: 2initial apicid: 2fpu: yesfpu_exception: yescpuid'
+                   ' size: 2048 KBphysical id: 0siblings: 2cpu cores:'
+                   ' 2fpu: yesfpu_exception: yescpuid'
                    ' level: 13wp: yesflags: fpu vme de pse tsc msr pae mce cx8 '
                    'apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx '
                    'fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm '