devserver: add ongoing au process number check in check_health.
This CL mainly does:
1. Add checking the number of current background au processes in
check_healthy devserver call.
2. Force kill_au_proc to kill the au process if the process's pid is
passed.
BUG=chromium:696606
TEST=Run local devserver and call check_health & kill_au_proc.
Change-Id: I4fe44407b85659bd3aab309ac8efe11b0b457f68
Reviewed-on: https://chromium-review.googlesource.com/447821
Reviewed-by: Aviv Keshet <akeshet@chromium.org>
Commit-Queue: Xixuan Wu <xixuan@chromium.org>
Tested-by: Xixuan Wu <xixuan@chromium.org>
diff --git a/cros_update_progress.py b/cros_update_progress.py
index f7c6b8e..72b872a 100644
--- a/cros_update_progress.py
+++ b/cros_update_progress.py
@@ -20,21 +20,36 @@
from __future__ import print_function
+import datetime
import glob
import logging
import os
+import re
+
+import log_util
+
+# Module-local log function.
+def _Log(message, *args):
+ return log_util.LogWithTag('CROS_UPDATE_PROGRESS', message, *args)
# only import setup_chromite before chromite import.
import setup_chromite # pylint: disable=unused-import
try:
from chromite.lib import osutils
except ImportError as e:
- logging.debug('chromite cannot be imported: %r', e)
+ _Log('chromite cannot be imported: %r', e)
osutils = None
+
# Path for status tracking log.
_TRACK_LOG_FILE_PATH = '/tmp/auto-update/tracking_log/%s_%s.log'
+# Pattern for status tracking log filename.
+_TRACK_LOG_FILE_NAME_PATTERN = r'([^_]+)_([^_]+).log'
+
+# The gap hour used in checking AU processes' count.
+AU_PROCESS_HOUR_GAP = 3
+
# Path for executing log.
_EXECUTE_LOG_FILE_PATH = '/tmp/auto-update/executing_log/%s_%s.log'
@@ -90,11 +105,73 @@
return _TRACK_LOG_FILE_PATH % (host_name, pid)
+def GetAllTrackStatusFileByTime():
+ """Return all track status files existing in TRACK_LOG_FILE_PATH.
+
+ Returns:
+ A track status file list ordered by created time reversely.
+ """
+ return sorted(glob.glob(_TRACK_LOG_FILE_PATH % ('*', '*')),
+ key=os.path.getctime, reverse=True)
+
+
+def ParsePidFromTrackLogFileName(track_log_filename):
+ """Parse pid from a given track log file's name.
+
+ The track log file's name for auto-update is fixed:
+ hostname_pid.log
+
+ This func is used to parse pid from a given track log file.
+
+ Args:
+ track_log_filename: the filename of the track log to be parsed.
+
+ Returns:
+ the parsed pid (int).
+ """
+ match = re.match(_TRACK_LOG_FILE_NAME_PATTERN, track_log_filename)
+ try:
+ return int(match.groups()[1])
+ except (AttributeError, IndexError, ValueError) as e:
+ _Log('Cannot parse pid from track log file %s: %s', track_log_filename, e)
+ return None
+
+
def GetAllTrackStatusFileByHostName(host_name):
"""Return a list of existing track status files generated for a host."""
return glob.glob(_TRACK_LOG_FILE_PATH % (host_name, '*'))
+def GetAllRunningAUProcess():
+ """Get all the ongoing AU processes' pids from tracking logs.
+
+ This func only checks the tracking logs generated in latest several hours,
+ which is for avoiding the case that 'there's a running process whose id is
+ as the same as a previous AU process'.
+
+ Returns:
+ A list of background AU processes' pids.
+ """
+ pids = []
+ now = datetime.datetime.now()
+ track_log_list = GetAllTrackStatusFileByTime()
+ # Only check log file created in 3 hours.
+ for track_log in track_log_list:
+ try:
+ created_time = datetime.datetime.fromtimestamp(
+ os.path.getctime(track_log))
+ if now - created_time >= datetime.timedelta(hours=AU_PROCESS_HOUR_GAP):
+ break
+
+ pid = ParsePidFromTrackLogFileName(os.path.basename(track_log))
+ if pid and IsProcessAlive(pid):
+ pids.append(pid)
+ except (ValueError, os.error) as e:
+ _Log('Error happened in getting pid from %s: %s', track_log, e)
+
+ return pids
+
+
def GetAUTempDirectory(host_name, pid):
"""Return the temp dir for storing codes and logs during auto-update."""
au_tempdir = _CROS_UPDATE_TEMP_PATH % (host_name, pid)
diff --git a/devserver.py b/devserver.py
index 8ab9d68..5f8b055 100755
--- a/devserver.py
+++ b/devserver.py
@@ -533,6 +533,7 @@
else:
return False
+
def _parse_string_arg(kwargs, key):
"""Parse string arg from kwargs.
@@ -548,6 +549,7 @@
else:
return None
+
def _build_uri_from_build_name(build_name):
"""Get build url from a given build name.
@@ -562,6 +564,26 @@
cros_update.STABLE_BUILD_CHANNEL, build_name.split('/')[0],
build_name.split('/')[1])
+
+def _clear_process(host_name, pid):
+ """Clear AU process for given hostname and pid.
+
+ This clear includes:
+ 1. kill process if it's alive.
+ 2. delete the track status file of this process.
+ 3. delete the executing log file of this process.
+
+ Args:
+ host_name: the host to execute auto-update.
+ pid: the background auto-update process id.
+ """
+ if cros_update_progress.IsProcessAlive(pid):
+ os.killpg(int(pid), signal.SIGKILL)
+
+ cros_update_progress.DelTrackStatusFile(host_name, pid)
+ cros_update_progress.DelExecuteLogFile(host_name, pid)
+
+
class ApiRoot(object):
"""RESTful API for Dev Server information."""
exposed = True
@@ -1021,6 +1043,8 @@
if 'host_name' not in kwargs:
raise common_util.DevServerHTTPError((KEY_ERROR_MSG % 'host_name'))
+ cur_pid = kwargs.get('pid')
+
host_name = kwargs['host_name']
track_log_list = cros_update_progress.GetAllTrackStatusFileByHostName(
host_name)
@@ -1029,11 +1053,10 @@
# Use splitext to remove file extension, then parse pid from the
# filename.
pid = os.path.splitext(os.path.basename(log))[0][len(host_name)+1:]
- if cros_update_progress.IsProcessAlive(pid):
- os.killpg(int(pid), signal.SIGKILL)
+ _clear_process(host_name, pid)
- cros_update_progress.DelTrackStatusFile(host_name, pid)
- cros_update_progress.DelExecuteLogFile(host_name, pid)
+ if cur_pid:
+ _clear_process(host_name, cur_pid)
return 'True'
@@ -1606,13 +1629,16 @@
apache_client_count = self._get_process_count('apache')
telemetry_test_count = self._get_process_count('python.*telemetry')
gsutil_count = self._get_process_count('gsutil')
+ au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
health_data = {
'free_disk': free_disk,
'staging_thread_count': DevServerRoot._staging_thread_count,
'apache_client_count': apache_client_count,
'telemetry_test_count': telemetry_test_count,
- 'gsutil_count': gsutil_count}
+ 'gsutil_count': gsutil_count,
+ 'au_process_count': au_process_count,
+ }
health_data.update(self._get_io_stats() or {})
return json.dumps(health_data)