[Autotest] Copy only new data in /var/log folder after each test.
Copy all content in /var/log folder causes overhead in both test time and
test result storage. The /var/log folder's size can be over 40MB after a
test is finished. Code change is made to implement the diff logic to only
copy new data in /var/log folder before and after each test.
BUG=chromium:217983
TEST=manually run autoserv with sleep test and verify new data in /var/log
folder is copied over to [test]/sysinfo/var/log_diff. Also use trybot build
trybot-lumpy-paladin/R28-3915.0.0-b741 to confirm autoupdate job's /var/log
is collected as expected.
DEPLOY=manual, before push to prod, this CL must be backport to all branches
being tested.
Change-Id: Ic78c5aa6c35eeab63bb3a0d8cb7095b7f55f217b
Previous-Reviewed-on: https://gerrit.chromium.org/gerrit/47138
(cherry picked from commit 5fdd9df17bace2b92a2e7a1ac131370c72c61278)
Reviewed-on: https://gerrit.chromium.org/gerrit/47892
Reviewed-by: Ben Henry <benhenry@chromium.org>
Tested-by: Ben Henry <benhenry@chromium.org>
diff --git a/client/bin/site_sysinfo.py b/client/bin/site_sysinfo.py
index 6266aaa..77f8d3d 100755
--- a/client/bin/site_sysinfo.py
+++ b/client/bin/site_sysinfo.py
@@ -4,7 +4,8 @@
import os
-from autotest_lib.client.common_lib import utils, global_config
+from autotest_lib.client.common_lib import log
+from autotest_lib.client.common_lib import error, utils, global_config
from autotest_lib.client.bin import base_sysinfo
from autotest_lib.client.cros import constants
@@ -69,6 +70,152 @@
log_dir, parent_dir))
+class file_stat(object):
+ """Store the file size and inode, used for retrieving new data in file."""
+ def __init__(self, file_path):
+ """Collect the size and inode information of a file.
+
+ @param file_path: full path to the file.
+
+ """
+ stat = os.stat(file_path)
+ # Start size of the file, skip that amount of bytes when do diff.
+ self.st_size = stat.st_size
+ # inode of the file. If inode is changed, treat this as a new file and
+ # copy the whole file.
+ self.st_ino = stat.st_ino
+
+
+class diffable_logdir(logdir):
+ """Represents a log directory that only new content will be copied.
+
+ An instance of this class should be added in both
+ before_iteration_loggables and after_iteration_loggables. This is to
+ guarantee the file status information is collected when run method is
+ called in before_iteration_loggables, and diff is executed when run
+ method is called in after_iteration_loggables.
+
+ """
+ def __init__(self, directory, additional_exclude=None,
+ keep_file_hierarchy=True, append_diff_in_name=True):
+ """
+ Constructor of a diffable_logdir instance.
+
+ @param directory: directory to be diffed after an iteration finished.
+ @param additional_exclude: additional dir to be excluded, not used.
+ @param keep_file_hierarchy: True if need to preserve full path, e.g.,
+ sysinfo/var/log/sysstat, v.s. sysinfo/sysstat if it's False.
+ @param append_diff_in_name: True if you want to append '_diff' to the
+ folder name to indicate it's a diff, e.g., var/log_diff. Option
+ keep_file_hierarchy must be True for this to take effect.
+
+ """
+ super(diffable_logdir, self).__init__(directory, additional_exclude)
+ self.additional_exclude = additional_exclude
+ self.keep_file_hierarchy = keep_file_hierarchy
+ self.append_diff_in_name = append_diff_in_name
+ # Init dictionary to store all file status for files in the directory.
+ self._log_stats = {}
+
+
+ def _get_init_status_of_src_dir(self, src_dir):
+ """Get initial status of files in src_dir folder.
+
+ @param src_dir: directory to be diff-ed.
+
+ """
+ # Dictionary used to store the initial status of files in src_dir.
+ for file_path in self._get_all_files(src_dir):
+ self._log_stats[file_path] = file_stat(file_path)
+ self.file_stats_collected = True
+
+
+ def _get_all_files(self, path):
+ """Iterate through files in given path including subdirectories.
+
+ @param path: root directory.
+ @return: an iterator that iterates through all files in given path
+ including subdirectories.
+
+ """
+ if not os.path.exists(path):
+ yield []
+ for root, dirs, files in os.walk(path):
+ for f in files:
+ if f.startswith('autoserv'):
+ continue
+ yield os.path.join(root, f)
+
+
+ def _copy_new_data_in_file(self, file_path, src_dir, dest_dir):
+ """Copy all new data in a file to target directory.
+
+ @param file_path: full path to the file to be copied.
+ @param src_dir: source directory to do the diff.
+ @param dest_dir: target directory to store new data of src_dir.
+
+ """
+ bytes_to_skip = 0
+ if self._log_stats.has_key(file_path):
+ prev_stat = self._log_stats[file_path]
+ new_stat = os.stat(file_path)
+ if new_stat.st_ino == prev_stat.st_ino:
+ bytes_to_skip = prev_stat.st_size
+ if new_stat.st_size == bytes_to_skip:
+ return
+ elif new_stat.st_size < prev_stat.st_size:
+ # File is modified to a smaller size, copy whole file.
+ bytes_to_skip = 0
+ try:
+ with open(file_path, 'r') as in_log:
+ if bytes_to_skip > 0:
+ in_log.seek(bytes_to_skip)
+ # Skip src_dir in path, e.g., src_dir/[sub_dir]/file_name.
+ target_path = os.path.join(dest_dir,
+ os.path.relpath(file_path, src_dir))
+ target_dir = os.path.dirname(target_path)
+ if not os.path.exists(target_dir):
+ os.makedirs(target_dir)
+ with open(target_path, "w") as out_log:
+ out_log.write(in_log.read())
+ except IOError as e:
+ logging.error('Diff %s failed with error: %s', file_path, e)
+
+
+ def _log_diff(self, src_dir, dest_dir):
+ """Log all of the new data in src_dir to dest_dir.
+
+ @param src_dir: source directory to do the diff.
+ @param dest_dir: target directory to store new data of src_dir.
+
+ """
+ if self.keep_file_hierarchy:
+ dir = src_dir.lstrip('/')
+ if self.append_diff_in_name:
+ dir = dir.rstrip('/') + '_diff'
+ dest_dir = os.path.join(dest_dir, dir)
+
+ if not os.path.exists(dest_dir):
+ os.makedirs(dest_dir)
+
+ for src_file in self._get_all_files(src_dir):
+ self._copy_new_data_in_file(src_file, src_dir, dest_dir)
+
+
+ def run(self, log_dir, collect_init_status=True):
+ """Copies new content from self.dir to the destination log_dir.
+
+ @param log_dir: The destination log directory.
+ @param collect_init_status: Set to True if run method is called to
+ collect the initial status of files.
+
+ """
+ if collect_init_status:
+ self._get_init_status_of_src_dir(self.dir)
+ elif os.path.exists(self.dir):
+ self._log_diff(self.dir, log_dir)
+
+
class purgeable_logdir(logdir):
"""Represents a log directory that will be purged."""
def __init__(self, directory, additional_exclude=None):
@@ -94,6 +241,14 @@
if not collect_corefiles:
crash_exclude_string = "*.core"
+ # This is added in before and after_iteration_loggables. When run is
+ # called in before_iteration_loggables, it collects file status in
+ # the directory. When run is called in after_iteration_loggables, diff
+ # is executed.
+ diffable_log = diffable_logdir(constants.LOG_DIR)
+ self.diffable_loggables = set()
+ self.diffable_loggables.add(diffable_log)
+
# add in some extra command logging
self.boot_loggables.add(command("ls -l /boot",
"boot_file_list"))
@@ -102,7 +257,6 @@
self.test_loggables.add(
purgeable_logdir(
os.path.join(constants.CRYPTOHOME_MOUNT_PT, "log")))
- self.test_loggables.add(logdir("/var/log"))
# We only want to gather and purge crash reports after the client test
# runs in case a client test is checking that a crash found at boot
# (such as a kernel crash) is handled.
@@ -127,6 +281,44 @@
purgeable_logdir(constants.CRASH_REPORTER_RESIDUE_DIR))
+ @log.log_and_ignore_errors("pre-test sysinfo error:")
+ def log_before_each_test(self, test):
+ """Logging hook called before a test starts.
+
+ @param test: A test object.
+ """
+ super(site_sysinfo, self).log_before_each_test(test)
+
+ for log in self.diffable_loggables:
+ log.run(log_dir=None, collect_init_status=True)
+
+
+ @log.log_and_ignore_errors("post-test sysinfo error:")
+ def log_after_each_test(self, test):
+ """Logging hook called after a test finishs.
+
+ @param test: A test object.
+ """
+ super(site_sysinfo, self).log_after_each_test(test)
+
+ test_sysinfodir = self._get_sysinfodir(test.outputdir)
+ for log in self.diffable_loggables:
+ log.run(log_dir=test_sysinfodir, collect_init_status=False)
+
+
+ def _get_chrome_version(self):
+ """Gets the Chrome version number as a string.
+
+ @return The current Chrome version number as a string. It is specified
+ in format "X.X.X.X" if it can be parsed in that format, otherwise
+ it is specified as the full output of "chrome --version".
+
+ """
+ version_string = utils.system_output(self._CHROME_VERSION_COMMAND)
+ match = re.search('\d+\.\d+\.\d+\.\d+', version_string)
+ return match.group(0) if match else version_string
+
+
def log_test_keyvals(self, test_sysinfodir):
keyval = super(site_sysinfo, self).log_test_keyvals(test_sysinfodir)
diff --git a/client/bin/site_sysinfo_unittest.py b/client/bin/site_sysinfo_unittest.py
new file mode 100644
index 0000000..c9078fc
--- /dev/null
+++ b/client/bin/site_sysinfo_unittest.py
@@ -0,0 +1,99 @@
+#!/usr/bin/python
+
+"""Tests for site_sysinfo."""
+
+__author__ = 'dshi@google.com (Dan Shi)'
+
+import common
+import os
+import random
+import unittest
+from autotest_lib.client.bin import site_sysinfo
+from autotest_lib.client.common_lib import autotemp
+
+
+class diffable_logdir_test(unittest.TestCase):
+ """Tests for methods in class diffable_logdir."""
+
+
+ def setUp(self):
+ """Initialize a temp direcotry with test files."""
+ self.tempdir = autotemp.tempdir(unique_id='diffable_logdir')
+ self.src_dir = os.path.join(self.tempdir.name, 'src')
+ self.dest_dir = os.path.join(self.tempdir.name, 'dest')
+
+ self.existing_files = ['existing_file_'+str(i) for i in range(3)]
+ self.existing_files_folder = ['', 'sub', 'sub/sub2']
+ self.existing_files_path = [os.path.join(self.src_dir, folder, f)
+ for f,folder in zip(self.existing_files,
+ self.existing_files_folder)]
+ self.new_files = ['new_file_'+str(i) for i in range(2)]
+ self.new_files_folder = ['sub', 'sub/sub3']
+ self.new_files_path = [os.path.join(self.src_dir, folder, f)
+ for f,folder in zip(self.new_files,
+ self.new_files_folder)]
+
+ # Create some file with random data in source directory.
+ for p in self.existing_files_path:
+ self.append_text_to_file(str(random.random()), p)
+
+
+ def tearDown(self):
+ """Clearn up."""
+ self.tempdir.clean()
+
+
+ def append_text_to_file(self, text, file_path):
+ """Append text to the end of a file, create the file if not existed.
+
+ @param text: text to be appended to a file.
+ @param file_path: path to the file.
+
+ """
+ dir_name = os.path.dirname(file_path)
+ if not os.path.exists(dir_name):
+ os.makedirs(dir_name)
+ with open(file_path, 'a') as f:
+ f.write(text)
+
+
+ def test_diffable_logdir_success(self):
+ """Test the diff function to save new data from a directory."""
+ info = site_sysinfo.diffable_logdir(self.src_dir,
+ keep_file_hierarchy=False,
+ append_diff_in_name=False)
+ # Run the first time to collect file status.
+ info.run(log_dir=None, collect_init_status=True)
+
+ # Add new files to the test directory.
+ for file_name, file_path in zip(self.new_files,
+ self.new_files_path):
+ self.append_text_to_file(file_name, file_path)
+
+ # Temp file for existing_file_2, used to hold on the inode. If the
+ # file is deleted and recreated, its inode might not change.
+ existing_file_2 = self.existing_files_path[2]
+ existing_file_2_tmp = existing_file_2 + '_tmp'
+ os.rename(existing_file_2, existing_file_2_tmp)
+
+ # Append data to existing file.
+ for file_name, file_path in zip(self.existing_files,
+ self.existing_files_path):
+ self.append_text_to_file(file_name, file_path)
+
+ # Remove the tmp file.
+ os.remove(existing_file_2_tmp)
+
+ # Run the second time to do diff.
+ info.run(self.dest_dir, collect_init_status=False)
+
+ # Validate files in dest_dir.
+ for file_name, file_path in zip(self.existing_files+self.new_files,
+ self.existing_files_path+self.new_files_path):
+ file_path = file_path.replace('src', 'dest')
+ with open(file_path, 'r') as f:
+ self.assertEqual(file_name, f.read())
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/client/cros/constants.py b/client/cros/constants.py
index eff1402..995df90 100644
--- a/client/cros/constants.py
+++ b/client/cros/constants.py
@@ -7,7 +7,7 @@
# Constants used by other constants.
USER_DATA_DIR = '/home/chronos'
WHITELIST_DIR = '/var/lib/whitelist'
-
+LOG_DIR = '/var/log'
# Rest of constants.
BROWSER = 'chrome'