blob: f682b56675673c65833579741b2e8c05305e3a67 [file] [log] [blame]
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This throttler tries to remove the remove repeated files sharing the same
prefix, for example, screenshots or dumps in the same folder. The dedupe logic
does not compare the file content, instead, it sorts the files with the same
prefix and remove files in the middle part.
"""
import os
import re
import result_info_lib
import throttler_lib
import utils_lib
# Number of files to keep for the oldest files.
OLDEST_FILES_TO_KEEP_COUNT = 2
# Number of files to keep for the newest files.
NEWEST_FILES_TO_KEEP_COUNT = 1
# Files with path mathing following patterns should not be deduped.
NO_DEDUPE_FILE_PATTERNS = [
'debug/.*',
'.*perf.data$', # Performance test data.
'.*/debug/.*',
'.*dir_summary_\d+.json',
]
# regex pattern to get the prefix of a file.
PREFIX_PATTERN = '([a-zA-Z_-]*).*'
def _group_by(file_infos, keys):
"""Group the file infos by the given keys.
@param file_infos: A list of ResultInfo objects.
@param keys: A list of names of the attribute to group the file infos by.
@return: A dictionary of grouped_key: [ResultInfo].
"""
grouped_infos = {}
for info in file_infos:
key_values = []
for key in keys:
key_values.append(getattr(info, key))
grouped_key = os.sep.join(key_values)
if grouped_key not in grouped_infos:
grouped_infos[grouped_key] = []
grouped_infos[grouped_key].append(info)
return grouped_infos
def _dedupe_files(summary, file_infos, max_result_size_KB):
"""Delete the given file and update the summary.
@param summary: A ResultInfo object containing result summary.
@param file_infos: A list of ResultInfo objects to be de-duplicated.
@param max_result_size_KB: Maximum test result size in KB.
"""
# Sort file infos based on the modify date of the file.
file_infos.sort(
key=lambda f: result_info_lib.get_last_modification_time(f.path))
file_infos_to_delete = file_infos[
OLDEST_FILES_TO_KEEP_COUNT:-NEWEST_FILES_TO_KEEP_COUNT]
for file_info in file_infos_to_delete:
if throttler_lib.try_delete_file_on_disk(file_info.path):
file_info.trimmed_size = 0
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
return
def throttle(summary, max_result_size_KB):
"""Throttle the files in summary by de-duplicating files.
Stop throttling until all files are processed or the result size is already
reduced to be under the given max_result_size_KB.
@param summary: A ResultInfo object containing result summary.
@param max_result_size_KB: Maximum test result size in KB.
"""
_, grouped_files = throttler_lib.sort_result_files(summary)
for pattern in throttler_lib.RESULT_THROTTLE_PRIORITY:
throttable_files = list(throttler_lib.get_throttleable_files(
grouped_files[pattern], NO_DEDUPE_FILE_PATTERNS))
for info in throttable_files:
info.parent_dir = os.path.dirname(info.path)
info.prefix = re.match(PREFIX_PATTERN, info.name).group(1)
# Group files for each parent directory
grouped_infos = _group_by(throttable_files, ['parent_dir', 'prefix'])
for infos in grouped_infos.values():
if (len(infos) <=
OLDEST_FILES_TO_KEEP_COUNT + NEWEST_FILES_TO_KEEP_COUNT):
# No need to dedupe if the count of file is too few.
continue
# Remove files can be deduped
utils_lib.LOG('De-duplicating files in %s with the same prefix of '
'"%s"' % (infos[0].parent_dir, infos[0].prefix))
#dedupe_file_infos = [i.result_info for i in infos]
_dedupe_files(summary, infos, max_result_size_KB)
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
return