Forced AU at OOBE with interruptions.

This test interrupts the forced update at OOBE by
1. Rebooting
2. Pulling the network for a couple of minutes
3. Suspend/Resume the device.

We want to ensure the device can continue with the update no matter what
end users do to the DUT.

The test team manually performs these steps during every new device FSI.
This test will remove the need for any more manual forced update at OOBE
testing.

BUG=chromium:810153
TEST=autoupdate_ForcedOOBEUpdate.* passing

Change-Id: I8e2a6c1ae8556f3afbcef4510e93fd87f2465ba4
Reviewed-on: https://chromium-review.googlesource.com/917651
Commit-Ready: David Haddock <dhaddock@chromium.org>
Tested-by: David Haddock <dhaddock@chromium.org>
Reviewed-by: Katherine Threlkeld <kathrelkeld@chromium.org>
diff --git a/client/site_tests/autoupdate_DisconnectReconnectNetwork/autoupdate_DisconnectReconnectNetwork.py b/client/site_tests/autoupdate_DisconnectReconnectNetwork/autoupdate_DisconnectReconnectNetwork.py
new file mode 100644
index 0000000..e463dd2
--- /dev/null
+++ b/client/site_tests/autoupdate_DisconnectReconnectNetwork/autoupdate_DisconnectReconnectNetwork.py
@@ -0,0 +1,79 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+import logging
+import shutil
+import time
+import urlparse
+
+from autotest_lib.client.bin import test
+from autotest_lib.client.common_lib import error
+from autotest_lib.client.common_lib import utils
+
+class autoupdate_DisconnectReconnectNetwork(test.test):
+    """
+    Tests removing network for a couple minutes.
+
+    This test will be used in conjunction with
+    autoupdate_ForcedOOBEUpdate.interrupt.full.
+
+    """
+    version = 1
+
+    def cleanup(self):
+        shutil.copy('/var/log/update_engine.log', self.resultsdir)
+
+        # Turn adapters back on
+        utils.run('ifconfig eth0 up', ignore_status=True)
+        utils.run('ifconfig eth1 up', ignore_status=True)
+        utils.start_service('recover_duts', ignore_status=True)
+
+        # We can't return right after reconnecting the network or the server
+        # test may not receive the message. So we wait a bit longer for the
+        # DUT to be reconnected.
+        utils.poll_for_condition(lambda: utils.ping(self._update_server,
+                                                    deadline=5, timeout=5) == 0,
+                                 timeout=60,
+                                 sleep_interval=1)
+        logging.info('Online ready to return to server test')
+
+
+    def run_once(self, update_url, time_without_network=120):
+        self._update_server = urlparse.urlparse(update_url).hostname
+        # DUTs in the lab have a service called recover_duts that is used to
+        # check that the DUT is online and if it is not it will bring it back
+        # online. We will need to stop this service for the length of this test.
+        utils.stop_service('recover_duts', ignore_status=True)
+
+        # Disable the network adapters.
+        utils.run('ifconfig eth0 down')
+        utils.run('ifconfig eth1 down')
+
+        # Check that we are offline.
+        result = utils.ping(self._update_server, deadline=5, timeout=5)
+        if result != 2:
+            raise error.TestFail('Ping succeeded even though we were offline.')
+
+        # Get the update percentage as the network is down
+        percent_before = utils.run('update_engine_client --status').stdout
+        percent_before = percent_before.splitlines()[1].partition('=')[2]
+
+        seconds = 1
+        while seconds < time_without_network:
+            logging.info(utils.run('update_engine_client --status').stdout)
+            time.sleep(1)
+            seconds = seconds + 1
+
+        percent_after = utils.run('update_engine_client --status').stdout
+        percent_after = percent_after.splitlines()[1].partition('=')[2]
+
+        if percent_before != percent_after:
+            if percent_before < percent_after:
+                raise error.TestFail('The update continued while the network '
+                                     'was supposedly disabled. Before: '
+                                     '%s, After: %s' % (percent_before,
+                                                        percent_after))
+            else:
+                raise error.TestFail('The update appears to have restarted. '
+                                     'Before: %s, After: %s' % (percent_before,
+                                                                percent_after))
\ No newline at end of file
diff --git a/client/site_tests/autoupdate_DisconnectReconnectNetwork/control b/client/site_tests/autoupdate_DisconnectReconnectNetwork/control
new file mode 100644
index 0000000..6aa4e06
--- /dev/null
+++ b/client/site_tests/autoupdate_DisconnectReconnectNetwork/control
@@ -0,0 +1,24 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+AUTHOR = "dhaddock, Chromium OS"
+NAME = "autoupdate_DisconnectReconnectNetwork"
+TEST_CATEGORY = "Functional"
+TEST_CLASS = "platform"
+TEST_TYPE = "client"
+PURPOSE = "Kill the network, ensure update engine doesn't continue, reconnect."
+TIME = "SHORT"
+
+DOC = """
+This test is used to pull the network cable from the DUTs and ensure that
+update_engine is able to continue with the update after it is done.
+
+The length of time the test waits can be configured by passing in a
+time_without_network=X variable. X is defined in seconds.
+
+This test will not be run standalone. It will be kicked off from a server
+side test.
+"""
+
+job.run_test('autoupdate_DisconnectReconnectNetwork')
diff --git a/server/site_tests/autoupdate_ForcedOOBEUpdate/autoupdate_ForcedOOBEUpdate.py b/server/site_tests/autoupdate_ForcedOOBEUpdate/autoupdate_ForcedOOBEUpdate.py
index f5921d0..02b978d 100644
--- a/server/site_tests/autoupdate_ForcedOOBEUpdate/autoupdate_ForcedOOBEUpdate.py
+++ b/server/site_tests/autoupdate_ForcedOOBEUpdate/autoupdate_ForcedOOBEUpdate.py
@@ -4,6 +4,7 @@
 import json
 import logging
 import os
+import random
 import time
 
 from autotest_lib.client.common_lib import error
@@ -61,13 +62,52 @@
         rootfs_hostlog = os.path.join(self.resultsdir, 'hostlog_rootfs')
         reboot_hostlog = os.path.join(self.resultsdir, 'hostlog_reboot')
 
-        with open(rootfs_hostlog, 'w') as outfile:
-            json.dump(hostlog[:self._ROOTFS_HOSTLOG_EVENTS], outfile)
+        # Each time we reboot in the middle of an update we ping omaha again
+        # for each update event. So parse the list backwards to get the final
+        # events.
         with open(reboot_hostlog, 'w') as outfile:
-            json.dump(hostlog[self._ROOTFS_HOSTLOG_EVENTS:], outfile)
+            json.dump(hostlog[-1:], outfile)
+        with open(rootfs_hostlog, 'w') as outfile:
+            json.dump(hostlog[len(hostlog)-1-self._ROOTFS_HOSTLOG_EVENTS:-1],
+                      outfile)
+
         return rootfs_hostlog, reboot_hostlog
 
 
+    def _get_update_percentage(self):
+        """Returns the current payload downloaded percentage."""
+        while True:
+            status = self._host.run('update_engine_client --status',
+                                    ignore_timeout=True,
+                                    timeout=10)
+            if not status:
+                continue
+            status = status.stdout.splitlines()
+            logging.debug(status)
+            if 'UPDATE_STATUS_IDLE' in status[2]:
+                raise error.TestFail('Update status was idle while trying to '
+                                     'get download status.')
+            # If we call this right after reboot it may not be downloading yet.
+            if 'UPDATE_STATUS_DOWNLOADING' not in status[2]:
+                time.sleep(1)
+                continue
+            return float(status[1].partition('=')[2])
+
+
+    def _update_continued_where_it_left_off(self, percentage):
+        """
+        Checks that the update did not restart after an interruption.
+
+        @param percentage: The percentage the last time we checked.
+
+        @returns True if update continued. False if update restarted.
+
+        """
+        completed = self._get_update_percentage()
+        logging.info('New value: %f, old value: %f', completed, percentage)
+        return completed >= percentage
+
+
     def _wait_for_update_to_complete(self):
         """Wait for the update that started to complete.
 
@@ -83,13 +123,44 @@
             if status is not None:
                 status = status.stdout.splitlines()
                 logging.debug(status)
-                if "UPDATE_STATUS_IDLE" in status[2]:
+                if 'UPDATE_STATUS_IDLE' in status[2]:
                     break
             time.sleep(1)
 
 
+    def _wait_for_percentage(self, percent):
+        """
+        Waits until we reach the percentage passed as the input.
+
+        @param percent: The percentage we want to wait for.
+        """
+        while True:
+            completed = self._get_update_percentage()
+            logging.debug('Checking if %s is greater than %s', completed,
+                          percent)
+            if completed > percent:
+                break
+            time.sleep(1)
+
+
     def run_once(self, host, full_payload=True, cellular=False,
-                 job_repo_url=None):
+                 interrupt=False, max_updates=1, job_repo_url=None):
+        """
+        Runs a forced autoupdate during ChromeOS OOBE.
+
+        @param host: The DUT that we are running on.
+        @param full_payload: True for a full payload. False for delta.
+        @param cellular: True to do the update over a cellualar connection.
+                         Requires that the DUT have a sim card slot.
+        @param interrupt: True to interrupt the update in the middle.
+        @param max_updates: Used to tell the test how many times it is
+                            expected to ping its omaha server.
+        @param job_repo_url: Used for debugging locally. This is used to figure
+                             out the current build and the devserver to use.
+                             The test will read this from a host argument
+                             when run in the lab.
+
+        """
         self._host = host
 
         # veyron_rialto is a medical device with a different OOBE that auto
@@ -100,7 +171,8 @@
         update_url = self.get_update_url_for_test(job_repo_url,
                                                   full_payload=full_payload,
                                                   critical_update=True,
-                                                  cellular=cellular)
+                                                  cellular=cellular,
+                                                  max_updates=max_updates)
         logging.info('Update url: %s', update_url)
         before = self._get_chromeos_version()
         payload_info = None
@@ -125,6 +197,39 @@
         client_at._check_client_test_result(self._host,
                                             'autoupdate_StartOOBEUpdate')
 
+        if interrupt:
+            # Choose a random downloaded percentage to interrupt the update.
+            percent = random.uniform(0.1, 0.8)
+            logging.debug('Percent when we will interrupt: %f', percent)
+            self._wait_for_percentage(percent)
+            logging.info('We will start interrupting the update.')
+            completed = self._get_update_percentage()
+
+            # Reboot the DUT during the update.
+            self._host.reboot()
+            if not self._update_continued_where_it_left_off(completed):
+                raise error.TestFail('The update did not continue where it '
+                                     'left off before rebooting.')
+            completed = self._get_update_percentage()
+
+            # Disconnect and reconnect network.
+            reconnect_test_name = 'autoupdate_DisconnectReconnectNetwork'
+            client_at.run_test(reconnect_test_name, update_url=update_url)
+            client_at._check_client_test_result(self._host, reconnect_test_name)
+            if not self._update_continued_where_it_left_off(completed):
+                raise error.TestFail('The update did not continue where it '
+                                     'left off before disconnecting network.')
+
+            # Suspend / Resume
+            boot_id = self._host.get_boot_id()
+            self._host.servo.lid_close()
+            self._host.test_wait_for_sleep()
+            self._host.servo.lid_open()
+            self._host.test_wait_for_boot(boot_id)
+            if not self._update_continued_where_it_left_off(completed):
+                raise error.TestFail('The update did not continue where it '
+                                     'left off after suspend/resume.')
+
         self._wait_for_update_to_complete()
 
         if cellular:
diff --git a/server/site_tests/autoupdate_ForcedOOBEUpdate/control.interrupt.full b/server/site_tests/autoupdate_ForcedOOBEUpdate/control.interrupt.full
new file mode 100644
index 0000000..f0c450f
--- /dev/null
+++ b/server/site_tests/autoupdate_ForcedOOBEUpdate/control.interrupt.full
@@ -0,0 +1,34 @@
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+AUTHOR = "dhaddock, Chromium OS"
+NAME = "autoupdate_ForcedOOBEUpdate.interrupt.full"
+PURPOSE = "Test forced update at OOBE with interruptions."
+TIME = "MEDIUM"
+TEST_CATEGORY = "Functional"
+TEST_CLASS = "platform"
+TEST_TYPE = "server"
+DEPENDENCIES = "servo, use_lid"
+ATTRIBUTES = "suite:bvt-perbuild"
+DOC = """
+This tests the forced autoupdate flow at OOBE with interruptions.
+
+During the update it will
+1. Reboot
+2. Disconnect the network for X minutes (X is configurable in the test).
+3. Use servo to open/close lid.
+
+"""
+
+from autotest_lib.client.common_lib import utils
+
+args_dict = utils.args_to_dict(args)
+servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
+
+def run(machine):
+    host = hosts.create_host(machine, servo_args=servo_args)
+    job.run_test('autoupdate_ForcedOOBEUpdate', host=host, full_payload=True,
+                 interrupt=True, max_updates=3, **args_dict)
+
+job.parallel_simple(run, machines)