devserver: refactor check_health to a separate app
This CL move all /check_health related code to a separate module and
wrap it as a standalone cherrypy application.
BUG=chromium:993621
TEST=1. Ran devserver_integrated_test.py in chroot
2. Ran below command locally to verify:
$ curl http://127.0.0.1:8080/check_health
Change-Id: Ic9c483202a2cef54d6953924e90001c9937a4fa8
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/1753193
Tested-by: Congbin Guo <guocb@chromium.org>
Auto-Submit: Congbin Guo <guocb@chromium.org>
Reviewed-by: C Shapiro <shapiroc@chromium.org>
Commit-Queue: Congbin Guo <guocb@chromium.org>
diff --git a/devserver.py b/devserver.py
index 130ce10..9d77be0 100755
--- a/devserver.py
+++ b/devserver.py
@@ -44,28 +44,28 @@
import sys
import tempfile
import threading
-import time
import types
from logging import handlers
import cherrypy
# pylint: disable=no-name-in-module
from cherrypy import _cplogging as cplogging
-from cherrypy.process import plugins # pylint: disable=import-error
+from cherrypy.process import plugins # pylint: disable=import-error
# pylint: enable=no-name-in-module
# This must happen before any local modules get a chance to import
# anything from chromite. Otherwise, really bad things will happen, and
# you will _not_ understand why.
-import setup_chromite # pylint: disable=unused-import
+import setup_chromite # pylint: disable=unused-import
-import autoupdate
import artifact_info
+import autoupdate
import build_artifact
import cherrypy_ext
import common_util
import devserver_constants
import downloader
+import health_checker
import log_util
import xbuddy
@@ -73,23 +73,6 @@
def _Log(message, *args):
return log_util.LogWithTag('DEVSERVER', message, *args)
-try:
- import psutil
-except ImportError:
- # Ignore psutil import failure. This is for backwards compatibility, so
- # "cros flash" can still update duts with build without psutil installed.
- # The reason is that, during cros flash, local devserver code is copied over
- # to DUT, and devserver will be running inside DUT to stage the build.
- _Log('Python module psutil is not installed, devserver load data will not be '
- 'collected')
- psutil = None
-except OSError as e:
- # Ignore error like following. psutil may not work properly in builder. Ignore
- # the error as load information of devserver is not used in builder.
- # OSError: [Errno 2] No such file or directory: '/dev/pts/0'
- _Log('psutil is failed to be imported, error: %s. devserver load data will '
- 'not be collected.', e)
- psutil = None
# Use try-except to skip unneccesary import for simple use case, eg. running
# devserver on host.
@@ -132,11 +115,8 @@
# For more, see the documentation in standard python library for
# logging.handlers.TimedRotatingFileHandler
_LOG_ROTATION_TIME = 'H'
-_LOG_ROTATION_INTERVAL = 12 # hours
-_LOG_ROTATION_BACKUP = 28 # backup counts
-
-# Number of seconds between the collection of disk and network IO counters.
-STATS_INTERVAL = 10.0
+_LOG_ROTATION_INTERVAL = 12 # hours
+_LOG_ROTATION_BACKUP = 28 # backup counts
# Auto-update parameters
@@ -151,32 +131,6 @@
"""Exception class used by this module."""
-def require_psutil():
- """Decorator for functions require psutil to run."""
- def deco_require_psutil(func):
- """Wrapper of the decorator function.
-
- Args:
- func: function to be called.
- """
- def func_require_psutil(*args, **kwargs):
- """Decorator for functions require psutil to run.
-
- If psutil is not installed, skip calling the function.
-
- Args:
- *args: arguments for function to be called.
- **kwargs: keyword arguments for function to be called.
- """
- if psutil:
- return func(*args, **kwargs)
- else:
- _Log('Python module psutil is not installed. Function call %s is '
- 'skipped.' % func)
- return func_require_psutil
- return deco_require_psutil
-
-
def _canonicalize_archive_url(archive_url):
"""Canonicalizes archive_url strings.
@@ -646,7 +600,6 @@
raise common_util.DevServerHTTPError(httplib.BAD_REQUEST,
'No label provided.')
-
@cherrypy.expose
def fileinfo(self, *args):
"""Returns information about a given staged file.
@@ -700,58 +653,15 @@
# Lock used to lock increasing/decreasing count.
_staging_thread_count_lock = threading.Lock()
- @require_psutil()
- def _refresh_io_stats(self):
- """A call running in a thread to update IO stats periodically."""
- prev_disk_io_counters = psutil.disk_io_counters()
- prev_network_io_counters = psutil.net_io_counters()
- prev_read_time = time.time()
- while True:
- time.sleep(STATS_INTERVAL)
- now = time.time()
- interval = now - prev_read_time
- prev_read_time = now
- # Disk IO is for all disks.
- disk_io_counters = psutil.disk_io_counters()
- network_io_counters = psutil.net_io_counters()
-
- self.disk_read_bytes_per_sec = (
- disk_io_counters.read_bytes -
- prev_disk_io_counters.read_bytes)/interval
- self.disk_write_bytes_per_sec = (
- disk_io_counters.write_bytes -
- prev_disk_io_counters.write_bytes)/interval
- prev_disk_io_counters = disk_io_counters
-
- self.network_sent_bytes_per_sec = (
- network_io_counters.bytes_sent -
- prev_network_io_counters.bytes_sent)/interval
- self.network_recv_bytes_per_sec = (
- network_io_counters.bytes_recv -
- prev_network_io_counters.bytes_recv)/interval
- prev_network_io_counters = network_io_counters
-
- @require_psutil()
- def _start_io_stat_thread(self):
- """Start the thread to collect IO stats."""
- thread = threading.Thread(target=self._refresh_io_stats)
- thread.daemon = True
- thread.start()
-
def __init__(self, _xbuddy):
self._builder = None
self._telemetry_lock_dict = common_util.LockDict()
self._xbuddy = _xbuddy
- # Cache of disk IO stats, a thread refresh the stats every 10 seconds.
- # lock is not used for these variables as the only thread writes to these
- # variables is _refresh_io_stats.
- self.disk_read_bytes_per_sec = 0
- self.disk_write_bytes_per_sec = 0
- # Cache of network IO stats.
- self.network_sent_bytes_per_sec = 0
- self.network_recv_bytes_per_sec = 0
- self._start_io_stat_thread()
+ @property
+ def staging_thread_count(self):
+ """Get the staging thread count."""
+ return self._staging_thread_count
@cherrypy.expose
def build(self, board, pkg, **kwargs):
@@ -765,6 +675,11 @@
def is_staged(self, **kwargs):
"""Check if artifacts have been downloaded.
+ Examples:
+ To check if autotest and test_suites are staged:
+ http://devserver_url:<port>/is_staged?archive_url=gs://your_url/path&
+ artifacts=autotest,test_suites
+
Args:
async: True to return without waiting for download to complete.
artifacts: Comma separated list of named artifacts to download.
@@ -774,12 +689,8 @@
will be available as is in the corresponding static directory with no
custom post-processing.
- Returns: True of all artifacts are staged.
-
- Examples:
- To check if autotest and test_suites are staged:
- http://devserver_url:<port>/is_staged?archive_url=gs://your_url/path&
- artifacts=autotest,test_suites
+ Returns:
+ True of all artifacts are staged.
"""
dl, factory = _get_downloader_and_factory(kwargs)
response = str(dl.IsStaged(factory))
@@ -790,14 +701,14 @@
def list_image_dir(self, **kwargs):
"""Take an archive url and list the contents in its staged directory.
- Args:
- archive_url: Google Storage URL for the build.
-
Examples:
To list the contents of where this devserver should have staged
gs://image-archive/<board>-release/<build> call:
http://devserver_url:<port>/list_image_dir?archive_url=<gs://..>
+ Args:
+ archive_url: Google Storage URL for the build.
+
Returns:
A string with information about the contents of the image directory.
"""
@@ -824,22 +735,6 @@
These artifacts will then be available from the static/ sub-directory of
the devserver.
- Args:
- archive_url: Google Storage URL for the build.
- local_path: Local path for the build.
- delete_source: Only meaningful with local_path. bool to indicate if the
- source files should be deleted. This is especially useful when staging
- a file locally in resource constrained environments as it allows us to
- move the relevant files locally instead of copying them.
- async: True to return without waiting for download to complete.
- artifacts: Comma separated list of named artifacts to download.
- These are defined in artifact_info and have their implementation
- in build_artifact.py.
- files: Comma separated list of files to stage. These
- will be available as is in the corresponding static directory with no
- custom post-processing.
- clean: True to remove any previously staged artifacts first.
-
Examples:
To download the autotest and test suites tarballs:
http://devserver_url:<port>/stage?archive_url=gs://your_url/path&
@@ -862,6 +757,22 @@
Will get staged to:
http://devserver_url:<port>/static/x86-mario-release/R26-3920.0.0
+
+ Args:
+ archive_url: Google Storage URL for the build.
+ local_path: Local path for the build.
+ delete_source: Only meaningful with local_path. bool to indicate if the
+ source files should be deleted. This is especially useful when staging
+ a file locally in resource constrained environments as it allows us to
+ move the relevant files locally instead of copying them.
+ async: True to return without waiting for download to complete.
+ artifacts: Comma separated list of named artifacts to download.
+ These are defined in artifact_info and have their implementation
+ in build_artifact.py.
+ files: Comma separated list of files to stage. These
+ will be available as is in the corresponding static directory with no
+ custom post-processing.
+ clean: True to remove any previously staged artifacts first.
"""
dl, factory = _get_downloader_and_factory(kwargs)
@@ -1115,7 +1026,7 @@
# The track log's full path is: path/host_name_pid.log
# Use splitext to remove file extension, then parse pid from the
# filename.
- pid = os.path.splitext(os.path.basename(log))[0][len(host_name)+1:]
+ pid = os.path.splitext(os.path.basename(log))[0][len(host_name) + 1:]
_clear_process(host_name, pid)
if cur_pid:
@@ -1639,83 +1550,6 @@
return updater.HandleUpdatePing(data, label)
- @require_psutil()
- def _get_io_stats(self):
- """Get the IO stats as a dictionary.
-
- Returns:
- A dictionary of IO stats collected by psutil.
- """
- return {'disk_read_bytes_per_second': self.disk_read_bytes_per_sec,
- 'disk_write_bytes_per_second': self.disk_write_bytes_per_sec,
- 'disk_total_bytes_per_second': (self.disk_read_bytes_per_sec +
- self.disk_write_bytes_per_sec),
- 'network_sent_bytes_per_second': self.network_sent_bytes_per_sec,
- 'network_recv_bytes_per_second': self.network_recv_bytes_per_sec,
- 'network_total_bytes_per_second': (self.network_sent_bytes_per_sec +
- self.network_recv_bytes_per_sec),
- 'cpu_percent': psutil.cpu_percent(),}
-
-
- def _get_process_count(self, process_cmd_pattern):
- """Get the count of processes that match the given command pattern.
-
- Args:
- process_cmd_pattern: The regex pattern of process command to match.
-
- Returns:
- The count of processes that match the given command pattern.
- """
- try:
- # Use Popen instead of check_output since the latter cannot run with old
- # python version (less than 2.7)
- proc = subprocess.Popen(
- ['pgrep', '-fc', process_cmd_pattern],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- )
- cmd_output, cmd_error = proc.communicate()
- if cmd_error:
- _Log('Error happened when getting process count: %s' % cmd_error)
-
- return int(cmd_output)
- except subprocess.CalledProcessError:
- return 0
-
-
- @cherrypy.expose
- def check_health(self):
- """Collect the health status of devserver to see if it's ready for staging.
-
- Returns:
- A JSON dictionary containing all or some of the following fields:
- free_disk (int): free disk space in GB
- staging_thread_count (int): number of devserver threads currently staging
- an image
- apache_client_count (int): count of Apache processes.
- telemetry_test_count (int): count of telemetry tests.
- gsutil_count (int): count of gsutil processes.
- """
- # Get free disk space.
- stat = os.statvfs(updater.static_dir)
- free_disk = stat.f_bsize * stat.f_bavail / 1000000000
- apache_client_count = self._get_process_count('bin/apache2? -k start')
- telemetry_test_count = self._get_process_count('python.*telemetry')
- gsutil_count = self._get_process_count('gsutil')
- au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
-
- health_data = {
- 'free_disk': free_disk,
- 'staging_thread_count': DevServerRoot._staging_thread_count,
- 'apache_client_count': apache_client_count,
- 'telemetry_test_count': telemetry_test_count,
- 'gsutil_count': gsutil_count,
- 'au_process_count': au_process_count,
- }
- health_data.update(self._get_io_stats() or {})
-
- return json.dumps(health_data)
-
def _CleanCache(cache_dir, wipe):
"""Wipes any excess cached items in the cache_dir.
@@ -1941,6 +1775,7 @@
return
dev_server = DevServerRoot(_xbuddy)
+ health_checker_app = health_checker.Root(dev_server, options.static_dir)
# Patch CherryPy to support binding to any available port (--port=0).
cherrypy_ext.ZeroPortPatcher.DoPatch(cherrypy)
@@ -1959,6 +1794,9 @@
except ValueError as e:
_Log('Failed to load the android build credential: %s. Error: %s.' %
(options.android_build_credential, e))
+
+ cherrypy.tree.mount(health_checker_app, '/check_health',
+ config=health_checker.get_config())
cherrypy.quickstart(dev_server, config=_GetConfig(options))
diff --git a/health_checker.py b/health_checker.py
new file mode 100644
index 0000000..8c274a3
--- /dev/null
+++ b/health_checker.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""A cherrypy application to check devserver health status."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+import cherrypy
+
+import cros_update_progress
+import log_util
+
+
+def _Log(message, *args):
+ """Module-local log function."""
+ return log_util.LogWithTag('HEALTHCHECKER', message, *args)
+
+
+try:
+ import psutil
+except ImportError:
+ # Ignore psutil import failure. This is for backwards compatibility, so
+ # "cros flash" can still update duts with build without psutil installed.
+ # The reason is that, during cros flash, local devserver code is copied over
+ # to DUT, and devserver will be running inside DUT to stage the build.
+ _Log('Python module psutil is not installed, devserver load data will not be '
+ 'collected')
+ psutil = None
+except OSError as e:
+ # Ignore error like following. psutil may not work properly in builder. Ignore
+ # the error as load information of devserver is not used in builder.
+ # OSError: [Errno 2] No such file or directory: '/dev/pts/0'
+ _Log('psutil is failed to be imported, error: %s. devserver load data will '
+ 'not be collected.', e)
+ psutil = None
+
+
+# Number of seconds between the collection of disk and network IO counters.
+STATS_INTERVAL = 10.0
+_1G = 1000000000
+
+
+def require_psutil():
+ """Decorator for functions require psutil to run."""
+ def deco_require_psutil(func):
+ """Wrapper of the decorator function.
+
+ Args:
+ func: function to be called.
+ """
+ def func_require_psutil(*args, **kwargs):
+ """Decorator for functions require psutil to run.
+
+ If psutil is not installed, skip calling the function.
+
+ Args:
+ *args: arguments for function to be called.
+ **kwargs: keyword arguments for function to be called.
+ """
+ if psutil:
+ return func(*args, **kwargs)
+ else:
+ _Log('Python module psutil is not installed. Function call %s is '
+ 'skipped.' % func)
+ return func_require_psutil
+ return deco_require_psutil
+
+
+def _get_process_count(process_cmd_pattern):
+ """Get the count of processes that match the given command pattern.
+
+ Args:
+ process_cmd_pattern: The regex pattern of process command to match.
+
+ Returns:
+ The count of processes that match the given command pattern.
+ """
+ try:
+ # Use Popen instead of check_output since the latter cannot run with old
+ # python version (less than 2.7)
+ proc = subprocess.Popen(
+ ['pgrep', '-fc', process_cmd_pattern],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ cmd_output, cmd_error = proc.communicate()
+ if cmd_error:
+ _Log('Error happened when getting process count: %s' % cmd_error)
+
+ return int(cmd_output)
+ except subprocess.CalledProcessError:
+ return 0
+
+
+def get_config():
+ """Get cherrypy config for this application."""
+ return {
+ '/': {
+ # Automatically add trailing slash, i.e.
+ # /check_health -> /check_health/.
+ 'tools.trailing_slash.on': False,
+ }
+ }
+
+
+class Root(object):
+ """Cherrypy Root class of the application."""
+ def __init__(self, devserver, static_dir):
+ self._static_dir = static_dir
+ self._devserver = devserver
+
+ # Cache of disk IO stats, a thread refresh the stats every 10 seconds.
+ # lock is not used for these variables as the only thread writes to these
+ # variables is _refresh_io_stats.
+ self.disk_read_bytes_per_sec = 0
+ self.disk_write_bytes_per_sec = 0
+ # Cache of network IO stats.
+ self.network_sent_bytes_per_sec = 0
+ self.network_recv_bytes_per_sec = 0
+ self._start_io_stat_thread()
+
+ @require_psutil()
+ def _get_io_stats(self):
+ """Get the IO stats as a dictionary.
+
+ Returns:
+ A dictionary of IO stats collected by psutil.
+ """
+ return {'disk_read_bytes_per_second': self.disk_read_bytes_per_sec,
+ 'disk_write_bytes_per_second': self.disk_write_bytes_per_sec,
+ 'disk_total_bytes_per_second': (self.disk_read_bytes_per_sec +
+ self.disk_write_bytes_per_sec),
+ 'network_sent_bytes_per_second': self.network_sent_bytes_per_sec,
+ 'network_recv_bytes_per_second': self.network_recv_bytes_per_sec,
+ 'network_total_bytes_per_second': (self.network_sent_bytes_per_sec +
+ self.network_recv_bytes_per_sec),
+ 'cpu_percent': psutil.cpu_percent(), }
+
+ @require_psutil()
+ def _refresh_io_stats(self):
+ """A call running in a thread to update IO stats periodically."""
+ prev_disk_io_counters = psutil.disk_io_counters()
+ prev_network_io_counters = psutil.net_io_counters()
+ prev_read_time = time.time()
+ while True:
+ time.sleep(STATS_INTERVAL)
+ now = time.time()
+ interval = now - prev_read_time
+ prev_read_time = now
+ # Disk IO is for all disks.
+ disk_io_counters = psutil.disk_io_counters()
+ network_io_counters = psutil.net_io_counters()
+
+ self.disk_read_bytes_per_sec = (
+ disk_io_counters.read_bytes -
+ prev_disk_io_counters.read_bytes) / interval
+ self.disk_write_bytes_per_sec = (
+ disk_io_counters.write_bytes -
+ prev_disk_io_counters.write_bytes) / interval
+ prev_disk_io_counters = disk_io_counters
+
+ self.network_sent_bytes_per_sec = (
+ network_io_counters.bytes_sent -
+ prev_network_io_counters.bytes_sent) / interval
+ self.network_recv_bytes_per_sec = (
+ network_io_counters.bytes_recv -
+ prev_network_io_counters.bytes_recv) / interval
+ prev_network_io_counters = network_io_counters
+
+ @require_psutil()
+ def _start_io_stat_thread(self):
+ """Start the thread to collect IO stats."""
+ thread = threading.Thread(target=self._refresh_io_stats)
+ thread.daemon = True
+ thread.start()
+
+ @cherrypy.expose
+ def index(self):
+ """Collect the health status of devserver to see if it's ready for staging.
+
+ Returns:
+ A JSON dictionary containing all or some of the following fields:
+ free_disk (int): free disk space in GB
+ staging_thread_count (int): number of devserver threads currently staging
+ an image
+ apache_client_count (int): count of Apache processes.
+ telemetry_test_count (int): count of telemetry tests.
+ gsutil_count (int): count of gsutil processes.
+ """
+ # Get free disk space.
+ stat = os.statvfs(self._static_dir)
+ free_disk = stat.f_bsize * stat.f_bavail / _1G
+ apache_client_count = _get_process_count('bin/apache2? -k start')
+ telemetry_test_count = _get_process_count('python.*telemetry')
+ gsutil_count = _get_process_count('gsutil')
+ au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
+
+ health_data = {
+ 'free_disk': free_disk,
+ 'staging_thread_count': self._devserver.staging_thread_count,
+ 'apache_client_count': apache_client_count,
+ 'telemetry_test_count': telemetry_test_count,
+ 'gsutil_count': gsutil_count,
+ 'au_process_count': au_process_count,
+ }
+ health_data.update(self._get_io_stats() or {})
+
+ return json.dumps(health_data)