tradefed_test: reboot on failed login.

Reboot DUT when the first login attempt/Android start fails. This
should restore sanity when a DUT state goes bad, now that we try to
rarely reboot a device.

This change also makes login(), reboot() and restart() public, plus it
adds two functions to mark the state of Chrome object dirty (for reboot
or restart). Using these functions will allow us in future changes to
react better to more common test retry failures.

BUG=b:70549825
TEST=Run dram, stop ui while Chrome is starting. -> reboot + test pass.

Change-Id: Ib04b2567035b12de7eaec16b5c01b73a1f291e5c
Reviewed-on: https://chromium-review.googlesource.com/828523
Commit-Ready: Ilja H. Friedel <ihf@chromium.org>
Tested-by: Ilja H. Friedel <ihf@chromium.org>
Reviewed-by: Kazuhiro Inaba <kinaba@chromium.org>
(cherry picked from commit df7c93285fc3c5ce78979d0b086fcf9fd7065adb)
Reviewed-on: https://chromium-review.googlesource.com/830433
Reviewed-by: Ilja H. Friedel <ihf@chromium.org>
(cherry picked from commit f00498e71371c3dcc9bb450fdbbeffa59ff40531)
Reviewed-on: https://chromium-review.googlesource.com/830586
diff --git a/server/cros/tradefed_test.py b/server/cros/tradefed_test.py
index 17718b9..cc4b70c 100644
--- a/server/cros/tradefed_test.py
+++ b/server/cros/tradefed_test.py
@@ -92,9 +92,26 @@
 class _ChromeLogin(object):
     """Context manager to handle Chrome login state."""
 
+    def need_reboot(self):
+        """Marks state as "dirty" - reboot needed during/after test."""
+        logging.info('Will reboot DUT when Chrome stops.')
+        self._need_reboot = True
+
+    def need_restart(self):
+        """Marks state as "dirty" - restart needed after test."""
+        self._need_restart = True
+
     def __init__(self, host, kwargs):
         self._host = host
         self._cts_helper_kwargs = kwargs
+        # We will override reboot/restart options to some degree. Keep track
+        # of them in a local copy.
+        self._need_reboot = False
+        if kwargs.get('reboot'):
+            self.need_reboot()
+        self._need_restart = False
+        if kwargs.get('restart'):
+            self.need_restart()
 
     def _cmd_builder(self, verbose=False):
         """Gets remote command to start browser with ARC enabled."""
@@ -102,7 +119,7 @@
         # getting the autodir will raise an exception.
         cmd = autotest.Autotest.get_installed_autodir(self._host)
         cmd += '/bin/autologin.py --arc'
-        if self._cts_helper_kwargs.get('dont_override_profile') == True:
+        if self._cts_helper_kwargs.get('dont_override_profile'):
             logging.info('Using --dont_override_profile to start Chrome.')
             cmd += ' --dont_override_profile'
         else:
@@ -111,7 +128,7 @@
             cmd += ' > /dev/null 2>&1'
         return cmd
 
-    def _login(self, timeout=60, raise_exception=False):
+    def login(self, timeout=60, raise_exception=False, verbose=False):
         """Logs into Chrome."""
         try:
             # We used to call cheets_StartAndroid, but it is a little faster to
@@ -119,7 +136,7 @@
             self._host.run(
                 self._cmd_builder(),
                 ignore_status=False,
-                verbose=False,
+                verbose=verbose,
                 timeout=timeout)
             return True
         except autotest.AutodirNotFoundError:
@@ -136,9 +153,15 @@
                     **self._cts_helper_kwargs)
                 return True
             except:
+                # We were unable to start the browser/Android. Maybe we can
+                # salvage the DUT by rebooting. This can hide some failures.
+                self.reboot()
                 if raise_exception:
                     raise
         except:
+            # We were unable to start the browser/Android. Maybe we can
+            # salvage the DUT by rebooting. This can hide some failures.
+            self.reboot()
             if raise_exception:
                 raise
         return False
@@ -146,10 +169,10 @@
     def __enter__(self):
         """Logs into Chrome with retry."""
         logging.info('Ensure Android is running...')
-        if not self._login():
-            logging.info('Sleeping before next login attempt...')
-            time.sleep(20)
-            self._login(timeout=120, raise_exception=True)
+        if not self.login():
+            # The DUT reboots after unsuccessful login, try with more time again.
+            logging.info('Retrying failed login...')
+            self.login(timeout=120, raise_exception=True, verbose=True)
 
     def __exit__(self, exc_type, exc_value, traceback):
         """On exit restart the browser or reboot the machine.
@@ -163,23 +186,28 @@
         @return None, indicating not to ignore an exception from the with-block
                 if raised.
         """
-        reboot = True
-        if self._cts_helper_kwargs.get('reboot') != True:
+        if not self._need_reboot:
             logging.info('Skipping reboot, restarting browser.')
-            reboot = False
             try:
-                # We clean up /tmp (which is memory backed) from crashes and
-                # other files. A reboot would have cleaned /tmp as well.
-                script = 'stop ui && find /tmp/ -mindepth 1 -delete && start ui'
-                self._host.run(
-                    script, ignore_status=False, verbose=False, timeout=120)
+                self.restart()
             except:
                 logging.error('Restarting browser has failed.')
-                reboot = True
-        if reboot:
-            self._reboot(exc_type, exc_value, traceback)
+                self.need_reboot()
+        if self._need_reboot:
+            self.reboot(exc_type, exc_value, traceback)
 
-    def _reboot(self, exc_type, exc_value, traceback):
+    def restart(self):
+        # We clean up /tmp (which is memory backed) from crashes and
+        # other files. A reboot would have cleaned /tmp as well.
+        # TODO(ihf): Remove "start ui" which is a nicety to non-ARC tests (i.e.
+        # now we wait on login screen, but login() above will 'stop ui' again
+        # before launching Chrome with ARC enabled).
+        script = 'stop ui'
+        script += '&& find /tmp/ -mindepth 1 -delete '
+        script += '&& start ui'
+        self._host.run(script, ignore_status=False, verbose=False, timeout=120)
+
+    def reboot(self, exc_type=None, exc_value=None, traceback=None):
         """Reboot the machine.
 
         @param exc_type: Exception type if an exception is raised from the
@@ -194,6 +222,7 @@
         logging.info('Rebooting...')
         try:
             self._host.reboot()
+            self._need_reboot = False
         except Exception:
             if exc_type is None:
                 raise