blob: 60cad0719cb155e44f24cc52d47bb69edd502e6a [file] [log] [blame]
#!/usr/bin/python
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
This is a utility to build a summary of the given directory. and save to a json
file.
Example usage:
result_utils.py -p path
The content of the json file looks like:
{'default': {'/D': {'control': {'/S': 734},
'debug': {'/D': {'client.0.DEBUG': {'/S': 5698},
'client.0.ERROR': {'/S': 254},
'client.0.INFO': {'/S': 1020},
'client.0.WARNING': {'/S': 242}},
'/S': 7214}
},
'/S': 7948
}
}
"""
import argparse
import copy
import glob
import json
import logging
import os
import time
# Do NOT import autotest_lib modules here. This module can be executed without
# dependency on other autotest modules. This is to keep the logic of result
# trimming on the server side, instead of depending on the autotest client
# module.
DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json'
# Minimum disk space should be available after saving the summary file.
MIN_FREE_DISK_BYTES = 10 * 1024 * 1024
# Following are key names for directory summaries. The keys are started with /
# so it can be differentiated with a valid file name. The short keys are
# designed for smaller file size of the directory summary.
# Original size of the directory or file
ORIGINAL_SIZE_BYTES = '/S'
# Size of the directory or file after trimming
TRIMMED_SIZE_BYTES = '/T'
# Size of the directory or file being collected from client side
COLLECTED_SIZE_BYTES = '/C'
# A dictionary of sub-directories' summary: name: {directory_summary}
DIRS = '/D'
# Default root directory name. To allow summaries to be merged effectively, all
# summaries are collected with root directory of ''
ROOT_DIR = ''
# Autotest uses some state files to track process running state. The files are
# deleted from test results. Therefore, these files can be ignored.
FILES_TO_IGNORE = set([
'control.autoserv.state'
])
def get_unique_dir_summary_file(path):
"""Get a unique file path to save the directory summary json string.
@param path: The directory path to save the summary file to.
"""
summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time()
# Make sure the summary file name is unique.
file_name = os.path.join(path, summary_file)
if os.path.exists(file_name):
count = 1
name, ext = os.path.splitext(summary_file)
while os.path.exists(file_name):
file_name = os.path.join(path, '%s_%s%s' % (name, count, ext))
count += 1
return file_name
def get_dir_summary(path, top_dir, all_dirs=set()):
"""Get the directory summary for the given path.
@param path: The directory to collect summary.
@param top_dir: The top directory to collect summary. This is to check if a
directory is a subdir of the original directory to collect summary.
@param all_dirs: A set of paths that have been collected. This is to prevent
infinite recursive call caused by symlink.
@return: A dictionary of the directory summary.
"""
dir_info = {}
dir_info[ORIGINAL_SIZE_BYTES] = 0
summary = {os.path.basename(path): dir_info}
if os.path.isfile(path):
dir_info[ORIGINAL_SIZE_BYTES] = os.stat(path).st_size
else:
dir_info[DIRS] = {}
real_path = os.path.realpath(path)
# The assumption here is that results are copied back to drone by
# copying the symlink, not the content, which is true with currently
# used rsync in cros_host.get_file call.
# Skip scanning the child folders if any of following condition is true:
# 1. The directory is a symlink and link to a folder under `top_dir`.
# 2. The directory was scanned already.
if ((os.path.islink(path) and real_path.startswith(top_dir)) or
real_path in all_dirs):
return summary
all_dirs.add(real_path)
for f in sorted(os.listdir(path)):
f_summary = get_dir_summary(os.path.join(path, f), top_dir,
all_dirs)
dir_info[DIRS][f] = f_summary[f]
dir_info[ORIGINAL_SIZE_BYTES] += f_summary[f][ORIGINAL_SIZE_BYTES]
return summary
def build_summary_json(path):
"""Build summary of files in the given path and return a json string.
@param path: The directory to build summary.
@return: A json string of the directory summary.
@raise IOError: If the given path doesn't exist.
"""
if not os.path.exists(path):
raise IOError('Path %s does not exist.' % path)
return get_dir_summary(path, top_dir=path)
def _update_sizes(entry):
"""Update a directory entry's sizes.
Values of ORIGINAL_SIZE_BYTES, TRIMMED_SIZE_BYTES and COLLECTED_SIZE_BYTES
are re-calculated based on the files under the directory. If the entry is a
file, skip the updating.
@param entry: A dict of directory entry in a summary.
"""
if DIRS not in entry:
return
entry[ORIGINAL_SIZE_BYTES] = sum([entry[DIRS][s][ORIGINAL_SIZE_BYTES]
for s in entry[DIRS]])
# Before trimming is implemented, COLLECTED_SIZE_BYTES and
# TRIMMED_SIZE_BYTES have the same value of ORIGINAL_SIZE_BYTES.
entry[COLLECTED_SIZE_BYTES] = sum([
entry[DIRS][s].get(
COLLECTED_SIZE_BYTES,
entry[DIRS][s].get(TRIMMED_SIZE_BYTES,
entry[DIRS][s][ORIGINAL_SIZE_BYTES]))
for s in entry[DIRS]])
entry[TRIMMED_SIZE_BYTES] = sum([
entry[DIRS][s].get(TRIMMED_SIZE_BYTES,
entry[DIRS][s][ORIGINAL_SIZE_BYTES])
for s in entry[DIRS]])
def _delete_missing_entries(summary_old, summary_new):
"""Delete files/directories only exists in old summary.
When the new summary is final, i.e., it's built from the final result
directory, files or directories missing are considered to be deleted and
trimmed to size 0.
@param summary_old: Old directory summary.
@param summary_new: New directory summary.
"""
for name in summary_old.keys():
if name not in summary_new:
if DIRS in summary_old[name]:
# Trim sub-directories.
_delete_missing_entries(summary_old[name][DIRS], {})
_update_sizes(summary_old[name])
elif name in FILES_TO_IGNORE:
# Remove the file from the summary as it can be ignored.
del summary_old[name]
else:
# Before setting the trimmed size to 0, update the collected
# size if it's not set yet.
if COLLECTED_SIZE_BYTES not in summary_old[name]:
trimmed_size = summary_old[name].get(
TRIMMED_SIZE_BYTES,
summary_old[name][ORIGINAL_SIZE_BYTES])
summary_old[name][COLLECTED_SIZE_BYTES] = trimmed_size
summary_old[name][TRIMMED_SIZE_BYTES] = 0
elif DIRS in summary_old[name]:
_delete_missing_entries(summary_old[name][DIRS],
summary_new[name][DIRS])
_update_sizes(summary_old[name])
_update_sizes(summary_old)
def _merge(summary_old, summary_new, is_final=False):
"""Merge a new directory summary to an old one.
Update the old directory summary with the new summary. Also calculate the
total size of results collected from the client side.
When merging with previously collected results, any results not existing in
the new summary or files with size different from the new files collected
are considered as extra results collected or overwritten by the new results.
Therefore, the size of the collected result should include such files, and
the COLLECTED_SIZE_BYTES can be larger than TRIMMED_SIZE_BYTES.
As an example:
summary_old: {'file1': {TRIMMED_SIZE_BYTES: 1000,
ORIGINAL_SIZE_BYTES: 1000,
COLLECTED_SIZE_BYTES: 1000}}
This means a result `file1` of original size 1KB was collected with size of
1KB byte.
summary_new: {'file1': {TRIMMED_SIZE_BYTES: 1000,
ORIGINAL_SIZE_BYTES: 2000,
COLLECTED_SIZE_BYTES: 1000}}
This means a result `file1` of 2KB was trimmed down to 1KB and was collected
with size of 1KB byte.
Note that the second result collection has an updated result `file1`
(because of the different ORIGINAL_SIZE_BYTES), and it needs to be rsync-ed
to the drone. Therefore, the merged summary will be:
{'file1': {TRIMMED_SIZE_BYTES: 1000,
ORIGINAL_SIZE_BYTES: 2000,
COLLECTED_SIZE_BYTES: 2000}}
Note that:
* TRIMMED_SIZE_BYTES is still at 1KB, which reflects the actual size of the
file be collected.
* ORIGINAL_SIZE_BYTES is updated to 2KB, which is the size of the file in
the new result `file1`.
* COLLECTED_SIZE_BYTES is 2KB because rsync will copy `file1` twice as it's
changed.
@param summary_old: Old directory summary.
@param summary_new: New directory summary.
@param is_final: True if summary_new is built from the final result folder.
Default is set to False.
@return: A tuple of (bytes_diff, merged_summary):
bytes_diff: The size of results collected based on the diff of the
old summary and the new summary.
merged_summary: Merged directory summary.
"""
for name in summary_new:
if not name in summary_old:
# A file/dir exists in new client dir, but not in the old one, which
# means that the file or a directory is newly collected.
summary_old[name] = copy.deepcopy(summary_new[name])
elif DIRS in summary_new[name]:
# `name` is a directory in new summary, merge the directories of the
# old and new summaries under `name`.
if DIRS not in summary_old[name]:
# If `name` is a file in old summary but a directory in new
# summary, the file in the old summary will be overwritten by
# the new directory by rsync. Therefore, force it to be an empty
# directory in old summary, so that the new directory can be
# merged.
summary_old[name][ORIGINAL_SIZE_BYTES] = 0
summary_old[name][TRIMMED_SIZE_BYTES] = 0
summary_old[name][COLLECTED_SIZE_BYTES] = 0
summary_old[name][DIRS] = {}
_merge(summary_old[name][DIRS], summary_new[name][DIRS], is_final)
else:
# `name` is a file. Compare the original size, if they are
# different, the file was overwritten, so increment the
# COLLECTED_SIZE_BYTES.
if DIRS in summary_old[name]:
# If `name` is a directory in old summary, but a file in the new
# summary, rsync will fail to copy the file as it can't
# overwrite an directory. Therefore, skip the merge.
continue
new_size = summary_new[name][ORIGINAL_SIZE_BYTES]
old_size = summary_old[name][ORIGINAL_SIZE_BYTES]
new_trimmed_size = summary_new[name].get(
TRIMMED_SIZE_BYTES, summary_new[name][ORIGINAL_SIZE_BYTES])
old_trimmed_size = summary_old[name].get(
TRIMMED_SIZE_BYTES, summary_old[name][ORIGINAL_SIZE_BYTES])
if new_size != old_size:
if is_final and new_trimmed_size == old_trimmed_size:
# If the file is merged from the final result folder to an
# older summary, it's not considered to be trimmed if the
# size is not changed. The reason is that the file on the
# server side does not have the info of its original size.
continue
# Before trimming is implemented, COLLECTED_SIZE_BYTES is the
# value of ORIGINAL_SIZE_BYTES.
new_collected_size = summary_new[name].get(
COLLECTED_SIZE_BYTES,
summary_new[name].get(
TRIMMED_SIZE_BYTES,
summary_new[name][ORIGINAL_SIZE_BYTES]))
old_collected_size = summary_old[name].get(
COLLECTED_SIZE_BYTES,
summary_old[name].get(
TRIMMED_SIZE_BYTES,
summary_old[name][ORIGINAL_SIZE_BYTES]))
summary_old[name][COLLECTED_SIZE_BYTES] = (
new_collected_size + old_collected_size)
summary_old[name][TRIMMED_SIZE_BYTES] = summary_new[name].get(
TRIMMED_SIZE_BYTES,
summary_new[name][ORIGINAL_SIZE_BYTES])
summary_old[name][ORIGINAL_SIZE_BYTES] = new_size
# Update COLLECTED_SIZE_BYTES and ORIGINAL_SIZE_BYTES based on the
# merged directory summary.
_update_sizes(summary_old[name])
def merge_summaries(path):
"""Merge all directory summaries in the given path.
This function calculates the total size of result files being collected for
the test device and the files generated on the drone. It also returns merged
directory summary.
@param path: A path to search for directory summaries.
@return a tuple of (client_collected_bytes, merged_summary):
client_collected_bytes: The total size of results collected from
the DUT. The number can be larger than the total file size of
the given path, as files can be overwritten or removed.
merged_summary: The merged directory summary of the given path.
"""
# Find all directory summary files and sort them by the time stamp in file
# name.
summary_files = glob.glob(os.path.join(path, 'dir_summary_*.json'))
summary_files = sorted(summary_files, key=os.path.getmtime)
all_summaries = []
for summary_file in summary_files:
with open(summary_file) as f:
all_summaries.append(json.load(f))
# Merge all summaries.
merged_summary = (copy.deepcopy(all_summaries[0]) if len(all_summaries) > 0
else {})
for summary in all_summaries[1:]:
_merge(merged_summary, summary)
# After all summaries from the test device (client side) are merged, we can
# get the total size of result files being transfered from the test device.
# If there is no directory summary collected, default client_collected_bytes
# to 0.
client_collected_bytes = 0
if merged_summary:
client_collected_bytes = merged_summary[ROOT_DIR][COLLECTED_SIZE_BYTES]
# Get the summary of current directory
# Make sure the path ends with /, so the top directory in the summary will
# be '', which is consistent with other summaries.
if not path.endswith(os.sep):
path += os.sep
last_summary = get_dir_summary(path, top_dir=path)
_merge(merged_summary, last_summary, is_final=True)
_delete_missing_entries(merged_summary, last_summary)
return client_collected_bytes, merged_summary
def main():
"""main script. """
parser = argparse.ArgumentParser()
parser.add_argument('-p', type=str, dest='path',
help='Path to build directory summary.')
options = parser.parse_args()
summary = build_summary_json(options.path)
summary_json = json.dumps(summary)
summary_file = get_unique_dir_summary_file(options.path)
# Make sure there is enough free disk to write the file
stat = os.statvfs(options.path)
free_space = stat.f_frsize * stat.f_bavail
if free_space - len(summary_json) < MIN_FREE_DISK_BYTES:
raise IOError('Not enough disk space after saving the summary file. '
'Available free disk: %s bytes. Summary file size: %s '
'bytes.' % (free_space, len(summary_json)))
with open(summary_file, 'w') as f:
f.write(summary_json)
logging.info('Directory summary of %s is saved to file %s.', options.path,
summary_file)
if __name__ == '__main__':
main()