Refactor the way we track the devserver in vm tests and kill it harder.

This CL re-writes most of the devserver_wrapper to be better code. I haven't
touched any of the non-file / non-watching devserver components, but I
refactored the rest.

Major changes:
1) Use a pid file to track the devserver and only kill that devserver with
increasingly painful signals.
2) Have Start() only return once the devserver is started.
3) Make the default paths work correctly. Chroot/outside chroot path business
is not handled very well by all calling scripts and most scripts don't care
as long as they can print the log. Take advantage of that.
4) Make the devserver_wrapper_stress_test conform with our code style.

BUG=chromium:251309
TEST=Ran au_test_harness + devinstall_test +
devserver_wrapper_stress_test.
CQ-DEPEND=CL:66548

Change-Id: I28a288b5a8a5c338c1b0c85cf6547c38fa3c9654
Reviewed-on: https://gerrit.chromium.org/gerrit/66987
Tested-by: Chris Sosa <sosa@chromium.org>
Reviewed-by: Don Garrett <dgarrett@chromium.org>
Commit-Queue: Chris Sosa <sosa@chromium.org>
diff --git a/au_test_harness/cros_au_test_harness.py b/au_test_harness/cros_au_test_harness.py
index fc6a2f8..363737c 100755
--- a/au_test_harness/cros_au_test_harness.py
+++ b/au_test_harness/cros_au_test_harness.py
@@ -205,10 +205,14 @@
 
   with sudo.SudoKeepAlive():
     au_worker.AUWorker.SetUpdateCache(update_cache)
-    my_server = dev_server_wrapper.DevServerWrapper(options.test_results_root)
-    my_server.start()
+    my_server = None
     try:
-      my_server.WaitUntilStarted()
+      # Only start a devserver if we'll need it.
+      if update_cache:
+        my_server = dev_server_wrapper.DevServerWrapper(
+            options.test_results_root)
+        my_server.Start()
+
       if options.type == 'vm':
         _RunTestsInParallel(options)
       else:
@@ -220,7 +224,8 @@
           cros_build_lib.Die('Test harness failed.')
 
     finally:
-      my_server.Stop()
+      if my_server:
+        my_server.Stop()
 
 
 if __name__ == '__main__':
diff --git a/devmode-test/devinstall_test.py b/devmode-test/devinstall_test.py
index c9a36c8..82d32f8 100755
--- a/devmode-test/devinstall_test.py
+++ b/devmode-test/devinstall_test.py
@@ -175,9 +175,8 @@
 
     if not self.binhost:
       logging.info('Starting the devserver.')
-      self.devserver = dev_server_wrapper.DevServerWrapper(self.tmpdir)
-      self.devserver.start()
-      self.devserver.WaitUntilStarted()
+      self.devserver = dev_server_wrapper.DevServerWrapper()
+      self.devserver.Start()
       self.binhost = dev_server_wrapper.DevServerWrapper.GetDevServerURL(
           sub_dir='static/pkgroot/%s/packages' % self.board)
 
diff --git a/lib/constants.py b/lib/constants.py
index be4bf21..f684452 100644
--- a/lib/constants.py
+++ b/lib/constants.py
@@ -8,6 +8,8 @@
 
 CROS_PLATFORM_ROOT = os.path.join(os.path.dirname(_TEST_LIB_PATH), '..', '..')
 
+DEFAULT_CHROOT_DIR = 'chroot'
+
 SOURCE_ROOT = os.path.realpath(os.path.join(
     os.path.dirname(_TEST_LIB_PATH), '..', '..', '..', '..'))
 
diff --git a/lib/dev_server_wrapper.py b/lib/dev_server_wrapper.py
index 8f0f270..5242e72 100644
--- a/lib/dev_server_wrapper.py
+++ b/lib/dev_server_wrapper.py
@@ -5,11 +5,14 @@
 """Module containing methods and classes to interact with a devserver instance.
 """
 
-import os
+import logging
 import multiprocessing
+import os
 import re
 import sys
+import tempfile
 import time
+import urllib2
 
 import constants
 from chromite.lib import cros_build_lib
@@ -17,6 +20,8 @@
 # Wait up to 15 minutes for the dev server to start. It can take a while to
 # start when generating payloads in parallel.
 DEV_SERVER_TIMEOUT = 900
+CHECK_HEALTH_URL = 'http://127.0.0.1:8080/check_health'
+KILL_TIMEOUT = 10
 
 
 def GetIPAddress(device='eth0'):
@@ -52,54 +57,113 @@
 class DevServerWrapper(multiprocessing.Process):
   """A Simple wrapper around a dev server instance."""
 
-  def __init__(self, test_root):
-    self.proc = None
-    self.test_root = test_root
-    self._log_filename = os.path.join(test_root, 'dev_server.log')
-    multiprocessing.Process.__init__(self)
+  def __init__(self, test_root=None):
+    super(DevServerWrapper, self).__init__()
+    self.test_root = test_root or DevServerWrapper.MkChrootTemp(is_dir=True)
+    self._log_filename = os.path.join(self.test_root, 'dev_server.log')
+    self._pid_file = DevServerWrapper.MkChrootTemp(is_dir=False)
+    self._pid = None
+
+  # Chroot helper methods. Since we call a script into the chroot with paths
+  # created outside the chroot, translation back and forth is needed. These
+  # methods all assume the default chroot dir.
+
+  @staticmethod
+  def MkChrootTemp(is_dir):
+    """Returns a temp(dir if is_dir, file otherwise) in the chroot."""
+    chroot_tmp = os.path.join(constants.SOURCE_ROOT, 'chroot', 'tmp')
+    if is_dir:
+      return tempfile.mkdtemp(prefix='devserver_wrapper', dir=chroot_tmp)
+    else:
+      return tempfile.NamedTemporaryFile(prefix='devserver_wrapper',
+                                         dir=chroot_tmp, delete=False).name
+
+  @staticmethod
+  def ToChrootPath(path):
+    """Converts the path outside the chroot to one that can be used inside."""
+    path = os.path.abspath(path)
+    if '/chroot/' not in path:
+      raise ValueError('Path %s is not a path that points inside the chroot' %
+                       path)
+
+    return '/' + path.partition('/chroot/')[2]
+
+  def _WaitUntilStarted(self):
+    """Wait until the devserver has started."""
+    current_time = time.time()
+    deadline = current_time + DEV_SERVER_TIMEOUT
+    while current_time <= deadline:
+      try:
+        if not self.is_alive():
+          raise DevServerException('Devserver crashed while starting')
+
+        urllib2.urlopen(CHECK_HEALTH_URL, timeout=0.05)
+        return
+      except IOError:
+        # urlopen errors will throw a subclass of IOError if we can't connect.
+        pass
+      finally:
+        # Let's not churn needlessly in this loop as the devserver starts up.
+        time.sleep(1)
+
+      current_time = time.time()
+    else:
+      self.terminate()
+      raise DevServerException('Devserver did not start')
 
   def run(self):
-    # Kill previous running instance of devserver if it exists.
-    self.Stop()
-    cmd = ['start_devserver']
-    cros_build_lib.SudoRunCommand(cmd, enter_chroot=True, print_cmd=False,
-                                  log_stdout_to_file=self._log_filename,
-                                  combine_stdout_stderr=True,
-                                  cwd=constants.SOURCE_ROOT)
+    """Kicks off devserver in a separate process and waits for it to finish."""
+    cmd = ['start_devserver',
+           '--pidfile', DevServerWrapper.ToChrootPath(self._pid_file),
+           '--logfile', DevServerWrapper.ToChrootPath(self._log_filename)]
+    return_obj = cros_build_lib.SudoRunCommand(
+        cmd, enter_chroot=True, debug_level=logging.DEBUG,
+        cwd=constants.SOURCE_ROOT, error_code_ok=True,
+        redirect_stdout=True, combine_stdout_stderr=True)
+    if return_obj.returncode != 0:
+      logging.error('Devserver terminated unexpectedly!')
+      logging.error(return_obj.output)
+
+  def Start(self):
+    """Starts a background devserver and waits for it to start.
+
+    Starts a background devserver and waits for it to start. Will only return
+    once devserver has started and running pid has been read.
+    """
+    self.start()
+    self._WaitUntilStarted()
+    # Pid file was passed into the chroot.
+    with open(self._pid_file, 'r') as f:
+      self._pid = f.read()
 
   def Stop(self):
-    """Kills the devserver instance if it exists."""
-    cros_build_lib.SudoRunCommand(['pkill', '-f', 'devserver.py'],
-                                  error_code_ok=True, print_cmd=False)
+    """Kills the devserver instance with SIGTERM and SIGKILL if SIGTERM fails"""
+    if not self._pid:
+      logging.error('No devserver running.')
+      return
+
+    logging.debug('Stopping devserver instance with pid %s', self._pid)
+    if self.is_alive():
+      cros_build_lib.SudoRunCommand(['kill', self._pid],
+                                    debug_level=logging.DEBUG)
+    else:
+      logging.error('Devserver not running!')
+      return
+
+    self.join(KILL_TIMEOUT)
+    if self.is_alive():
+      logging.warning('Devserver is unstoppable. Killing with SIGKILL')
+      cros_build_lib.SudoRunCommand(['kill', '-9', self._pid],
+                                    debug_level=logging.DEBUG)
 
   def PrintLog(self):
-    """Print devserver output."""
+    """Print devserver output to stdout."""
     print '--- Start output from %s ---' % self._log_filename
     # Open in update mode in case the child process hasn't opened the file yet.
     with open(self._log_filename) as log:
       sys.stdout.writelines(log)
-    print '--- End output from %s ---' % self._log_filename
 
-  def WaitUntilStarted(self):
-    """Wait until the devserver has started."""
-    # Open in update mode in case the child process hasn't opened the file yet.
-    pos = 0
-    with open(self._log_filename, 'w+') as log:
-      for _ in range(DEV_SERVER_TIMEOUT * 2):
-        log.seek(pos)
-        for line in log:
-          # When the dev server has started, it will print a line stating
-          # 'Bus STARTED'. Wait for that line to appear.
-          if 'Bus STARTED' in line:
-            return
-          # If we've read a complete line, and it doesn't contain the magic
-          # phrase, move on to the next line.
-          if line.endswith('\n'):
-            pos = log.tell()
-        # Looks like it hasn't started yet. Keep waiting...
-        time.sleep(0.5)
-    self.PrintLog()
-    raise DevServerException('Timeout waiting for the devserver to startup.')
+    print '--- End output from %s ---' % self._log_filename
 
   @classmethod
   def GetDevServerURL(cls, port=None, sub_dir=None):
diff --git a/lib/dev_server_wrapper_stress_test.py b/lib/dev_server_wrapper_stress_test.py
index 0b75c3b..b10b907 100755
--- a/lib/dev_server_wrapper_stress_test.py
+++ b/lib/dev_server_wrapper_stress_test.py
@@ -6,27 +6,33 @@
 
 """Stress test for dev_server_wrapper.
 
-Test script runs forever stressing the ability to start and stop the
+Test script runs a long time stressing the ability to start and stop the
 dev_server_wrapper. Even very rare hangs will cause significant build flake.
 """
 
+import logging
 import sys
 
 import constants
-sys.path.append(constants.CROSUTILS_LIB_DIR)
 sys.path.append(constants.CROS_PLATFORM_ROOT)
 sys.path.append(constants.SOURCE_ROOT)
 
 from crostestutils.lib import dev_server_wrapper
 
-i = 0
-while True:
-  i += 1
-  print 'Iteration {}'.format(i)
-  wrapper = dev_server_wrapper.DevServerWrapper('/tmp')
-  print 'Starting'
-  wrapper.start()
-  print 'Waiting for Started'
-  wrapper.WaitUntilStarted()
-  print 'Stopping'
-  wrapper.Stop()
+
+_ITERATIONS = 10000
+
+
+def main():
+  logging.getLogger().setLevel(logging.DEBUG)
+  for i in range(_ITERATIONS):
+    print 'Iteration {}'.format(i)
+    wrapper = dev_server_wrapper.DevServerWrapper()
+    print 'Starting'
+    wrapper.Start()
+    print 'Stopping'
+    wrapper.Stop()
+
+
+if __name__ == '__main__':
+  main()