devserver: add ongoing au process number check in check_health.

This CL mainly does:
1. Add checking the number of current background au processes in
check_healthy devserver call.
2. Force kill_au_proc to kill the au process if the process's pid is
passed.

BUG=chromium:696606
TEST=Run local devserver and call check_health & kill_au_proc.

Change-Id: I4fe44407b85659bd3aab309ac8efe11b0b457f68
Reviewed-on: https://chromium-review.googlesource.com/447821
Reviewed-by: Aviv Keshet <akeshet@chromium.org>
Commit-Queue: Xixuan Wu <xixuan@chromium.org>
Tested-by: Xixuan Wu <xixuan@chromium.org>
diff --git a/cros_update_progress.py b/cros_update_progress.py
index f7c6b8e..72b872a 100644
--- a/cros_update_progress.py
+++ b/cros_update_progress.py
@@ -20,21 +20,36 @@
 
 from __future__ import print_function
 
+import datetime
 import glob
 import logging
 import os
+import re
+
+import log_util
+
+# Module-local log function.
+def _Log(message, *args):
+  return log_util.LogWithTag('CROS_UPDATE_PROGRESS', message, *args)
 
 # only import setup_chromite before chromite import.
 import setup_chromite # pylint: disable=unused-import
 try:
   from chromite.lib import osutils
 except ImportError as e:
-  logging.debug('chromite cannot be imported: %r', e)
+  _Log('chromite cannot be imported: %r', e)
   osutils = None
 
+
 # Path for status tracking log.
 _TRACK_LOG_FILE_PATH = '/tmp/auto-update/tracking_log/%s_%s.log'
 
+# Pattern for status tracking log filename.
+_TRACK_LOG_FILE_NAME_PATTERN = r'([^_]+)_([^_]+).log'
+
+# The gap hour used in checking AU processes' count.
+AU_PROCESS_HOUR_GAP = 3
+
 # Path for executing log.
 _EXECUTE_LOG_FILE_PATH = '/tmp/auto-update/executing_log/%s_%s.log'
 
@@ -90,11 +105,73 @@
   return _TRACK_LOG_FILE_PATH % (host_name, pid)
 
 
+def GetAllTrackStatusFileByTime():
+  """Return all track status files existing in TRACK_LOG_FILE_PATH.
+
+  Returns:
+    A track status file list ordered by created time reversely.
+  """
+  return sorted(glob.glob(_TRACK_LOG_FILE_PATH % ('*', '*')),
+                key=os.path.getctime, reverse=True)
+
+
+def ParsePidFromTrackLogFileName(track_log_filename):
+  """Parse pid from a given track log file's name.
+
+  The track log file's name for auto-update is fixed:
+      hostname_pid.log
+
+  This func is used to parse pid from a given track log file.
+
+  Args:
+    track_log_filename: the filename of the track log to be parsed.
+
+  Returns:
+    the parsed pid (int).
+  """
+  match = re.match(_TRACK_LOG_FILE_NAME_PATTERN, track_log_filename)
+  try:
+    return int(match.groups()[1])
+  except (AttributeError, IndexError, ValueError) as e:
+    _Log('Cannot parse pid from track log file %s: %s', track_log_filename, e)
+    return None
+
+
 def GetAllTrackStatusFileByHostName(host_name):
   """Return a list of existing track status files generated for a host."""
   return glob.glob(_TRACK_LOG_FILE_PATH % (host_name, '*'))
 
 
+def GetAllRunningAUProcess():
+  """Get all the ongoing AU processes' pids from tracking logs.
+
+  This func only checks the tracking logs generated in latest several hours,
+  which is for avoiding the case that 'there's a running process whose id is
+  as the same as a previous AU process'.
+
+  Returns:
+    A list of background AU processes' pids.
+  """
+  pids = []
+  now = datetime.datetime.now()
+  track_log_list = GetAllTrackStatusFileByTime()
+  # Only check log file created in 3 hours.
+  for track_log in track_log_list:
+    try:
+      created_time = datetime.datetime.fromtimestamp(
+          os.path.getctime(track_log))
+      if now - created_time >= datetime.timedelta(hours=AU_PROCESS_HOUR_GAP):
+        break
+
+      pid = ParsePidFromTrackLogFileName(os.path.basename(track_log))
+      if pid and IsProcessAlive(pid):
+        pids.append(pid)
+    except (ValueError, os.error) as e:
+      _Log('Error happened in getting pid from %s: %s', track_log, e)
+
+  return pids
+
+
 def GetAUTempDirectory(host_name, pid):
   """Return the temp dir for storing codes and logs during auto-update."""
   au_tempdir = _CROS_UPDATE_TEMP_PATH % (host_name, pid)
diff --git a/devserver.py b/devserver.py
index 8ab9d68..5f8b055 100755
--- a/devserver.py
+++ b/devserver.py
@@ -533,6 +533,7 @@
   else:
     return False
 
+
 def _parse_string_arg(kwargs, key):
   """Parse string arg from kwargs.
 
@@ -548,6 +549,7 @@
   else:
     return None
 
+
 def _build_uri_from_build_name(build_name):
   """Get build url from a given build name.
 
@@ -562,6 +564,26 @@
       cros_update.STABLE_BUILD_CHANNEL, build_name.split('/')[0],
       build_name.split('/')[1])
 
+
+def _clear_process(host_name, pid):
+  """Clear AU process for given hostname and pid.
+
+  This clear includes:
+    1. kill process if it's alive.
+    2. delete the track status file of this process.
+    3. delete the executing log file of this process.
+
+  Args:
+    host_name: the host to execute auto-update.
+    pid: the background auto-update process id.
+  """
+  if cros_update_progress.IsProcessAlive(pid):
+    os.killpg(int(pid), signal.SIGKILL)
+
+  cros_update_progress.DelTrackStatusFile(host_name, pid)
+  cros_update_progress.DelExecuteLogFile(host_name, pid)
+
+
 class ApiRoot(object):
   """RESTful API for Dev Server information."""
   exposed = True
@@ -1021,6 +1043,8 @@
     if 'host_name' not in kwargs:
       raise common_util.DevServerHTTPError((KEY_ERROR_MSG % 'host_name'))
 
+    cur_pid = kwargs.get('pid')
+
     host_name = kwargs['host_name']
     track_log_list = cros_update_progress.GetAllTrackStatusFileByHostName(
         host_name)
@@ -1029,11 +1053,10 @@
       # Use splitext to remove file extension, then parse pid from the
       # filename.
       pid = os.path.splitext(os.path.basename(log))[0][len(host_name)+1:]
-      if cros_update_progress.IsProcessAlive(pid):
-        os.killpg(int(pid), signal.SIGKILL)
+      _clear_process(host_name, pid)
 
-      cros_update_progress.DelTrackStatusFile(host_name, pid)
-      cros_update_progress.DelExecuteLogFile(host_name, pid)
+    if cur_pid:
+      _clear_process(host_name, cur_pid)
 
     return 'True'
 
@@ -1606,13 +1629,16 @@
     apache_client_count = self._get_process_count('apache')
     telemetry_test_count = self._get_process_count('python.*telemetry')
     gsutil_count = self._get_process_count('gsutil')
+    au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
 
     health_data = {
         'free_disk': free_disk,
         'staging_thread_count': DevServerRoot._staging_thread_count,
         'apache_client_count': apache_client_count,
         'telemetry_test_count': telemetry_test_count,
-        'gsutil_count': gsutil_count}
+        'gsutil_count': gsutil_count,
+        'au_process_count': au_process_count,
+    }
     health_data.update(self._get_io_stats() or {})
 
     return json.dumps(health_data)