blob: 887e2bf94131515e53b81bc9dfb57b6494c3d42c [file] [log] [blame]
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import throttler_lib
import utils_lib
# File extensions that can be safely shrunk.
# Extension matching is case-insensitive but the items in this set must be
# lowercase to match.
# Files without an extension and with no alphabetic characters in the extension
# (e.g. file.20201110) are always shrinkable.
SHRINKABLE_EXTENSIONS = frozenset([
'.log',
'.txt',
'.debug',
'.error',
'.info',
'.warning',
])
# Regex for paths that should not be shrunk.
UNSHRINKABLE_PATH_PATTERNS = [
# Files in a log_diff/ directory should already be relatively small,
# and trimming them further would be detrimental to debugging. If
# they're too large, let other throttlers (e.g., zip_file_ or
# delete_file_) deal with them.
# Only blocklist a few known-useful log_diff's.
'/log_diff/messages$',
'/log_diff/net\.log$',
# Ramoops files are small but relatively important.
# The name of this file has changed starting with linux-3.19.
# Use a glob to match all existing records.
'/console-ramoops.*',
]
TRIMMED_FILE_HEADER = '!!! This file is trimmed !!!\n'
ORIGINAL_SIZE_TEMPLATE = 'Original size: %d bytes\n\n'
# Regex pattern to retrieve the original size of the file.
ORIGINAL_SIZE_REGEX = 'Original size: (\d+) bytes'
TRIMMED_FILE_INJECT_TEMPLATE = """
========================================================================
< %d > characters are trimmed here.
========================================================================
"""
# Percent of file content to keep at the beginning and end of the file, default
# to 20%.
HEAD_SIZE_PERCENT = 0.20
# Default size in byte to trim the file down to.
DEFAULT_FILE_SIZE_LIMIT_BYTE = 100 * 1024
def _trim_file(file_info, file_size_limit_byte):
"""Remove the file content in the middle to reduce the file size.
@param file_info: A ResultInfo object containing summary for the file to be
shrunk.
@param file_size_limit_byte: Maximum file size in bytes after trimming.
"""
utils_lib.LOG('Trimming file %s to reduce size from %d bytes to %d bytes' %
(file_info.path, file_info.original_size,
file_size_limit_byte))
new_path = os.path.join(os.path.dirname(file_info.path),
file_info.name + '_trimmed')
original_size_bytes = file_info.original_size
with open(new_path, 'w') as new_file, open(file_info.path) as old_file:
# Read the beginning part of the old file, if it's already started with
# TRIMMED_FILE_HEADER, no need to add the header again.
header = old_file.read(len(TRIMMED_FILE_HEADER))
if header != TRIMMED_FILE_HEADER:
new_file.write(TRIMMED_FILE_HEADER)
new_file.write(ORIGINAL_SIZE_TEMPLATE % file_info.original_size)
else:
line = old_file.readline()
match = re.match(ORIGINAL_SIZE_REGEX, line)
if match:
original_size_bytes = int(match.group(1))
header_size_bytes = new_file.tell()
# Move old file reader to the beginning of the file.
old_file.seek(0, os.SEEK_SET)
new_file.write(old_file.read(
int((file_size_limit_byte - header_size_bytes) *
HEAD_SIZE_PERCENT)))
# Position to seek from the end of the file.
seek_pos = -(file_size_limit_byte - new_file.tell() -
len(TRIMMED_FILE_INJECT_TEMPLATE))
bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
# Adjust seek position based on string TRIMMED_FILE_INJECT_TEMPLATE
seek_pos += len(str(bytes_to_skip)) - 2
bytes_to_skip = original_size_bytes + seek_pos - old_file.tell()
new_file.write(TRIMMED_FILE_INJECT_TEMPLATE % bytes_to_skip)
old_file.seek(seek_pos, os.SEEK_END)
new_file.write(old_file.read())
stat = os.stat(file_info.path)
if not throttler_lib.try_delete_file_on_disk(file_info.path):
# Clean up the intermediate file.
throttler_lib.try_delete_file_on_disk(new_path)
utils_lib.LOG('Failed to shrink %s' % file_info.path)
return
os.rename(new_path, file_info.path)
# Modify the new file's timestamp to the old one.
os.utime(file_info.path, (stat.st_atime, stat.st_mtime))
# Update the trimmed_size.
file_info.trimmed_size = file_info.size
def _get_shrinkable_files(file_infos, file_size_limit_byte):
"""Filter the files that can be throttled.
@param file_infos: A list of ResultInfo objects.
@param file_size_limit_byte: Minimum file size in bytes to be throttled.
@yield: ResultInfo objects that can be shrunk.
"""
for info in file_infos:
ext = os.path.splitext(info.name)[1].lower()
# if ext contains alphabetic characters and is not in the allowlist,
# skip the file.
# islower() returns false if the string does not contain any alphabetic
# characters, e.g. '.20201110'.islower() is False.
if ext.islower() and ext not in SHRINKABLE_EXTENSIONS:
continue
match_found = False
for pattern in UNSHRINKABLE_PATH_PATTERNS:
if re.search(pattern, info.path):
match_found = True
break
if match_found:
continue
if info.trimmed_size <= file_size_limit_byte:
continue
yield info
def throttle(summary, max_result_size_KB,
file_size_limit_byte=DEFAULT_FILE_SIZE_LIMIT_BYTE,
skip_autotest_log=False):
"""Throttle the files in summary by trimming file content.
Stop throttling until all files are processed or the result file size is
already reduced to be under the given max_result_size_KB.
@param summary: A ResultInfo object containing result summary.
@param max_result_size_KB: Maximum test result size in KB.
@param file_size_limit_byte: Limit each file's size in the summary to be
under the given threshold, until all files are processed or the
result size is under the given max_result_size_KB.
@param skip_autotest_log: True to skip shrink Autotest logs, default is
False.
"""
file_infos, _ = throttler_lib.sort_result_files(summary)
extra_patterns = ([throttler_lib.AUTOTEST_LOG_PATTERN] if skip_autotest_log
else [])
file_infos = throttler_lib.get_throttleable_files(
file_infos, extra_patterns)
file_infos = _get_shrinkable_files(file_infos, file_size_limit_byte)
for info in file_infos:
_trim_file(info, file_size_limit_byte)
if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
return