Better handling of "not identical machines" failure.

Nightly tests, especially on the x86-generic boxes, fail frequently
because the DUTs have different board images on them, so Crosperf
decides the machines are not identical and refuses to run the tests.
With this CL, if the machine_manager finds that the machines fail the
"identical" test, it will try to push the same image onto all the machines,
and then check them again to see if they are the same.  It only tries this
once; if they fail the check the second time around, it is still a fatal
failure.  This should eliminate many of the unnecessary failures in our
nightly tests.

This CL also fixes a small bug  in the auto-delete script (this fix has
been running for a while on mobiletc-prebuild, but never got committed).

BUG=None
TEST=Ran several iterations where I forced the first "identical" check to
fail.  The changes worked.

Change-Id: Ied2a55e5d3e2789e58a503aef03269888954b579
Reviewed-on: https://chrome-internal-review.googlesource.com/163334
Reviewed-by: Luis Lozano <llozano@chromium.org>
Commit-Queue: Caroline Tice <cmtice@google.com>
Tested-by: Caroline Tice <cmtice@google.com>
diff --git a/auto_delete_nightly_test_data.py b/auto_delete_nightly_test_data.py
index faebb5b..fc0e259 100755
--- a/auto_delete_nightly_test_data.py
+++ b/auto_delete_nightly_test_data.py
@@ -91,7 +91,7 @@
   # We go back 1 week, delete from that day till we are
   # options.days_to_preserve away from today.
   s = d - 7
-  e = d - options.days_to_preserve
+  e = d - int(options.days_to_preserve)
   rv = 0
   for i in range(s + 1, e):
     if i <= 0:
diff --git a/crosperf/benchmark_run.py b/crosperf/benchmark_run.py
index ec39b40..e4fe693 100644
--- a/crosperf/benchmark_run.py
+++ b/crosperf/benchmark_run.py
@@ -13,6 +13,7 @@
 from utils import command_executer
 from utils import timeline
 
+from machine_manager import NonMatchingMachines
 from suite_runner import SuiteRunner
 from results_cache import MockResult
 from results_cache import MockResultsCache
@@ -141,10 +142,19 @@
 
   def AcquireMachine(self):
     while True:
+      machine = None
       if self.terminated:
         raise Exception("Thread terminated while trying to acquire machine.")
-      machine = self.machine_manager.AcquireMachine(self.label.chromeos_image,
-                                                    self.label)
+      try:
+        machine = self.machine_manager.AcquireMachine(self.label.chromeos_image,
+                                                      self.label,
+                                                      throw=True)
+
+      except NonMatchingMachines:
+        self.machine_manager.ForceSameImageToAllMachines(self.label)
+        machine = self.machine_manager.AcquireMachine(self.label.chromeos_image,
+                                                      self.label,
+                                                      throw=False)
 
       if machine:
         self._logger.LogOutput("%s: Machine %s acquired at %s" %
diff --git a/crosperf/machine_manager.py b/crosperf/machine_manager.py
index 52c3d81..04a4eec 100644
--- a/crosperf/machine_manager.py
+++ b/crosperf/machine_manager.py
@@ -22,6 +22,8 @@
 
 CHECKSUM_FILE = "/usr/local/osimage_checksum_file"
 
+class NonMatchingMachines(Exception):
+  pass
 
 class CrosMachine(object):
   def __init__(self, name, chromeos_root, log_level):
@@ -33,6 +35,9 @@
     self.test_run = None
     self.chromeos_root = chromeos_root
     self.log_level = log_level
+    self.SetUpChecksumInfo()
+
+  def SetUpChecksumInfo(self):
     if not self.IsReachable():
       self.machine_checksum = None
       return
@@ -288,6 +293,7 @@
     checksums = [m.machine_checksum for m in self.GetMachines(label)]
     return len(set(checksums)) == 1
 
+
   def RemoveMachine(self, machine_name):
     with self._lock:
       self._machines = [m for m in self._machines
@@ -297,7 +303,14 @@
         logger.GetLogger().LogError("Could not unlock machine: '%s'."
                                     % m.name)
 
-  def AcquireMachine(self, chromeos_image, label):
+  def ForceSameImageToAllMachines(self, label):
+    machines = self.GetMachines(label)
+    chromeos_image = label.chromeos_image
+    for m in machines:
+      self.ImageMachine(m, label)
+      m.SetUpChecksumInfo()
+
+  def AcquireMachine(self, chromeos_image, label, throw=False):
     if label.image_type == "local":
       image_checksum = ImageChecksummer().Checksum(label, self.log_level)
     elif label.image_type == "trybot":
@@ -315,7 +328,13 @@
           if new_machine:
             m.released_time = time.time()
         if not self.AreAllMachineSame(label):
-          logger.GetLogger().LogFatal("-- not all the machine are identical")
+          if not throw:
+            # Log fatal message, which calls sys.exit.  Default behavior.
+            logger.GetLogger().LogFatal("-- not all the machines are identical")
+          else:
+            # Raise an exception, which can be caught and handled by calling
+            # function.
+            raise NonMatchingMachines("Not all the machines are identical")
         if self.GetAvailableMachines(label):
           break
         else: