blob: 02bae92836c568e0608d59e41f51077963c88b62 [file] [log] [blame]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles.
AFDO sampling and ICF have an unfortunate interaction that causes a huge
inflation in sample counts. Essentially, if you have N functions ICF'ed to the
same location, one AFDO sample in any of those N functions will count as one
sample in *each* of those N functions.
In practice, there are a few forms of function bodies that are very heavily
ICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types
like std::map...). Recording 28,000 samples across all N thousand logical
functions that point to the same body really hurts our AFDO numbers, given that
our actual sample count across all of Chrome is something around 10,000,000.
(No, really, these are actual numbers. In practice, at the time of writing,
this script eliminates >90% of our AFDO samples by count. Sometimes as high as
98%.)
It reads a textual AFDO profile from stdin, and prints a 'fixed' version of it
to stdout. A summary of what the script actually did is printed to stderr.
"""
from __future__ import division, print_function
import collections
import re
import sys
def _count_samples(samples):
"""Count the total number of samples in a function."""
line_re = re.compile(r'^(\s*)\d+(?:\.\d+)?: (\d+)\s*$')
top_level_samples = 0
all_samples = 0
for line in samples:
m = line_re.match(line)
if not m:
continue
spaces, n = m.groups()
n = int(n)
all_samples += n
if len(spaces) == 1:
top_level_samples += n
return top_level_samples, all_samples
# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO
# profile. function_line is the top line of said function, and `samples` is
# a list of all of the sample lines below that.
#
# We rely on the format of these strings in some places in this script. For
# reference, a full function sample will look something like:
#
# _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185
# 6: 83
# 15: 126
# 62832: 126
# 6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349
# 1: 206
# 1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060
# 1: 206
# 11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800
# 2.1: 80
#
#
# In that case, function_line is
# '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be
# every line below that.
#
# Function lines look like;
# function_symbol:entry_count:dont_care
#
# And samples look like one of:
# arbitrary_number: sample_count
# arbitrary_number: inlined_function_symbol:inlined_entry_count
ProfileRecord = collections.namedtuple('ProfileRecord',
['function_line', 'samples'])
def _normalize_samples(samples):
"""Normalizes the samples in the given function body.
Normalization just means that we redact inlined function names. This is
done so that a bit of templating doesn't make two function bodies look
distinct. Namely:
template <typename T>
__attribute__((noinline))
int getNumber() { return 1; }
template <typename T>
__attribute__((noinline))
int getNumberIndirectly() { return getNumber<T>(); }
int main() {
return getNumber<int>() + getNumber<float>();
}
If the profile has the mangled name for getNumber<float> in
getNumberIndirectly<float> (and similar for <int>), we'll consider them to
be distinct when they're not.
"""
# I'm not actually sure if this ends up being an issue in practice, but it's
# simple enough to guard against.
inlined_re = re.compile(r'(^\s*\d+): [^:]+:(\s*\d+)\s*$')
result = []
for s in samples:
m = inlined_re.match(s)
if m:
result.append('%s: __REDACTED__:%s' % m.groups())
else:
result.append(s)
return tuple(result)
def _read_textual_afdo_profile(stream):
"""Parses an AFDO profile from a line stream into ProfileRecords."""
# ProfileRecords are actually nested, due to inlining. For the purpose of
# this script, that doesn't matter.
lines = (line.rstrip() for line in stream)
function_line = None
samples = []
for line in lines:
if not line:
continue
if line[0].isspace():
assert function_line is not None, 'sample exists outside of a function?'
samples.append(line)
continue
if function_line is not None:
yield ProfileRecord(function_line=function_line, samples=tuple(samples))
function_line = line
samples = []
if function_line is not None:
yield ProfileRecord(function_line=function_line, samples=tuple(samples))
# The default of 100 is arbitrarily selected, but it does make the overwhelming
# majority of obvious sample duplication disappear.
#
# We experimented shortly with an nm-powered version of this script (rather
# than structural matching, we'd see which functions mapped to the same literal
# address). Running this with a high value (100) for max_repeats produced
# results basically indistinguishable from nm, so ...
#
# Non-nm based approaches are superior because they don't require any prior
# build artifacts; just an AFDO profile.
def dedup_records(profile_records, summary_file, max_repeats=100):
"""Removes heavily duplicated records from profile_records.
profile_records is expected to be an iterable of ProfileRecord.
max_repeats ia how many functions must share identical bodies for us to
consider it 'heavily duplicated' and remove the results.
"""
# Build a mapping of function structure -> list of functions with identical
# structure and sample counts
counts = collections.defaultdict(list)
for record in profile_records:
counts[_normalize_samples(record.samples)].append(record)
# Be sure that we didn't see any duplicate functions, since that's bad...
total_functions_recorded = sum(len(records) for records in counts.values())
unique_function_names = {
record.function_line.split(':')[0]
for records in counts.values()
for record in records
}
assert len(unique_function_names) == total_functions_recorded, \
'duplicate function names?'
num_kept = 0
num_samples_kept = 0
num_top_samples_kept = 0
num_total = 0
num_samples_total = 0
num_top_samples_total = 0
for normalized_samples, records in counts.items():
top_sample_count, all_sample_count = _count_samples(normalized_samples)
top_sample_count *= len(records)
all_sample_count *= len(records)
num_total += len(records)
num_samples_total += all_sample_count
num_top_samples_total += top_sample_count
if len(records) >= max_repeats:
continue
num_kept += len(records)
num_samples_kept += all_sample_count
num_top_samples_kept += top_sample_count
for record in records:
yield record
print(
'Retained {:,}/{:,} functions'.format(num_kept, num_total),
file=summary_file)
print(
'Retained {:,}/{:,} samples, total'.format(num_samples_kept,
num_samples_total),
file=summary_file)
print('Retained {:,}/{:,} top-level samples' \
.format(num_top_samples_kept, num_top_samples_total),
file=summary_file)
def run(profile_input_file, summary_output_file, profile_output_file):
profile_records = _read_textual_afdo_profile(profile_input_file)
# Sort this so we get deterministic output. AFDO doesn't care what order it's
# in.
deduped = sorted(
dedup_records(profile_records, summary_output_file),
key=lambda r: r.function_line)
for function_line, samples in deduped:
print(function_line, file=profile_output_file)
print('\n'.join(samples), file=profile_output_file)
def _main():
run(profile_input_file=sys.stdin,
summary_output_file=sys.stderr,
profile_output_file=sys.stdout)
if __name__ == '__main__':
_main()