TLB counter verification tests - corrections.
BUG=None
TEST=Run on wolf board
Change-Id: I932b3dc8c72c84a95e90c7d018bc388d6a4560f3
Reviewed-on: https://chromium-review.googlesource.com/293003
Commit-Ready: Nemanja Vasić <nvasic@google.com>
Tested-by: Nemanja Vasić <nvasic@google.com>
Reviewed-by: David Sharp <dhsharp@google.com>
diff --git a/client/site_tests/hardware_PerfCounterVerification/control b/client/site_tests/hardware_PerfCounterVerification/control
index 0576ffd..8c79ac4 100644
--- a/client/site_tests/hardware_PerfCounterVerification/control
+++ b/client/site_tests/hardware_PerfCounterVerification/control
@@ -17,10 +17,21 @@
Arguments:
events: Events to pass to perf stat -e. Events are passed in together, so
multiplexing may occur if more than one is specified.
+ program: Benchmark binary
"""
job.run_test('hardware_PerfCounterVerification', tag='cycles_instructions',
- perf_cmd='stat', events=('cycles', 'instructions'))
+ perf_cmd='stat', events=('cycles', 'instructions'),
+ program='noploop', multiplier=10000000)
job.run_test('hardware_PerfCounterVerification', tag='LBR',
- perf_cmd='record -b', events=('br_inst_retired.all_branches',))
+ perf_cmd='record -b', events=('br_inst_retired.all_branches',),
+ program='noploop', multiplier=10000000)
+
+job.run_test('hardware_PerfCounterVerification', tag='iTLB_misses',
+ perf_cmd='stat', events=('iTLB-misses','cycles'),
+ program='iTLB_benchmark', multiplier=100)
+
+job.run_test('hardware_PerfCounterVerification', tag='dTLB_misses',
+ perf_cmd='stat', events=('dTLB-misses','cycles'),
+ program='dTLB_benchmark', multiplier=10000)
diff --git a/client/site_tests/hardware_PerfCounterVerification/hardware_PerfCounterVerification.py b/client/site_tests/hardware_PerfCounterVerification/hardware_PerfCounterVerification.py
index 4043dea..a98cbee 100644
--- a/client/site_tests/hardware_PerfCounterVerification/hardware_PerfCounterVerification.py
+++ b/client/site_tests/hardware_PerfCounterVerification/hardware_PerfCounterVerification.py
@@ -27,9 +27,11 @@
For cycles and instructions, we expect a strong correlation between
the number of iterations of a "noploop" program and the number of
- cycles and instructions. That is, each loop iteration should retire
- a constant number of additional instructions, and should take a
- nearly constant number of additional cycles.
+ cycles and instructions. For TLB misses, we expect a strong correlation
+ between number of misses and number of iterations of a matching benchmark
+ Each loop iteration should retire a constant number of additional
+ instructions, and should take a nearly constant number of additional
+ cycles or misses.
"""
version = 1
@@ -55,16 +57,16 @@
if board in unsupported_boards:
raise error.TestNAError('Unsupported board')
- def run_once(self, **kwargs):
- noploop = os.path.join(self.srcdir, 'noploop')
+ def run_once(self, program, multiplier, **kwargs):
+ program = os.path.join(self.srcdir, program)
if self.perf_cmd == 'stat':
self.facts = perf_verification.GatherPerfStats(
- noploop, ','.join(self.events))
+ program, ','.join(self.events), multiplier)
elif self.perf_cmd == 'record -b':
branch = perf_lbr_verification.ReadBranchAddressesFile(
os.path.join(self.srcdir, 'noploop_branch.txt'))
self.facts = perf_lbr_verification.GatherPerfBranchSamples(
- noploop, branch, ','.join(self.events),
+ program, branch, ','.join(self.events),
10000)
else:
raise error.TestError('Unrecognized perf_cmd')
@@ -79,14 +81,22 @@
('branch_count', numpy.int)])
arr = stats_utils.FactsToNumpyArray(self.facts, dt)
results = {}
+ is_tlb_benchmark = ('iTLB-misses' in dt.names or
+ 'dTLB-misses' in dt.names)
for y_var in dt.names:
if y_var == 'loops': continue
+ if y_var == 'cycles' and is_tlb_benchmark: continue
(slope, intercept), r2 = stats_utils.LinearRegression(
arr['loops'], arr[y_var])
prefix = y_var + '_'
results[prefix+'slope'] = slope
results[prefix+'intercept'] = intercept
results[prefix+'r_squared'] = r2
+ if y_var in ('dTLB-misses', 'iTLB-misses'):
+ misses_per_milion_cycles = [x[y_var] * 1.0e6 / x['cycles']
+ for x in self.facts]
+ rvar = prefix+'misses_per_milion_cycles'
+ results[rvar] = numpy.max(misses_per_milion_cycles)
self.write_perf_keyval(results)
@@ -97,13 +107,18 @@
else:
cycles_r_squared_expectation = 0.999
- if ('cycles' in self.events and
+ if ('cycles' in self.events and not is_tlb_benchmark and
results['cycles_r_squared'] < cycles_r_squared_expectation):
raise error.TestFail('Poor correlation for cycles ~ loops')
if ('instructions' in self.events and
results['instructions_r_squared'] < 0.999999):
raise error.TestFail('Poor correlation for instructions ~ loops')
-
+ if ('iTLB-misses' in self.events and
+ results['iTLB-misses_r_squared'] < 0.999):
+ raise error.TestFail('Poor correlation for iTLB-misses ~ loops')
+ if ('dTLB-misses' in self.events and
+ results['dTLB-misses_r_squared'] < 0.999):
+ raise error.TestFail('Poor correlation for dTLB-misses ~ loops')
if (self.perf_cmd == 'record -b' and
results['branch_count_r_squared'] < 0.9999999):
raise error.TestFail('Poor correlation for branch_count ~ loops')
diff --git a/client/site_tests/hardware_PerfCounterVerification/perf_verification.py b/client/site_tests/hardware_PerfCounterVerification/perf_verification.py
index 05ebbc4..dbc033c 100755
--- a/client/site_tests/hardware_PerfCounterVerification/perf_verification.py
+++ b/client/site_tests/hardware_PerfCounterVerification/perf_verification.py
@@ -16,12 +16,14 @@
"""Module error class."""
-def GatherPerfStats(noploop, events, progress_func=lambda i, j: None):
- """Run perf stat with the given events and noploop program.
+def GatherPerfStats(program, events, multiplier=1000,
+ progress_func=lambda i, j: None):
+ """Run perf stat with the given events and given program.
- @param noploop: path to noploop binary. It should take one argument (number
- of loop iterations) and produce no output.
+ @param program: path to benchmark binary. It should take one argument
+ (number of loop iterations) and produce no output.
@param events: value to pass to '-e' arg of perf stat.
+ @param multiplier: loop multiplier
@param progress_func: function that tracks progress of running the
benchmark. takes two arguments for the outer and inner iteration
numbers.
@@ -30,11 +32,11 @@
facts = []
for i, j in itertools.product(xrange(10), xrange(5)):
progress_func(i, j)
- loops = (i+1) * 10000000 # (i+1) * 10 million
+ loops = (i+1) * multiplier
out = subprocess.check_output(
('perf', 'stat', '-x', ',',
'-e', events,
- noploop, '%d' % loops),
+ program, '%d' % loops),
stderr=subprocess.STDOUT)
unsupported_events = []
f = {'loops': loops}
@@ -74,7 +76,7 @@
events = ('cycles', 'instructions')
facts = GatherPerfStats('src/noploop', ','.join(events),
- progress_func=_Progress)
+ multiplier=10*1000*1000, progress_func=_Progress)
dt = numpy.dtype([('loops', numpy.int)] +
[(e, numpy.int) for e in events])
diff --git a/client/site_tests/hardware_PerfCounterVerification/src/Makefile b/client/site_tests/hardware_PerfCounterVerification/src/Makefile
index 820badb..b96642f 100644
--- a/client/site_tests/hardware_PerfCounterVerification/src/Makefile
+++ b/client/site_tests/hardware_PerfCounterVerification/src/Makefile
@@ -1,6 +1,8 @@
CFLAGS=-O0 -g
-OUTPUTS=noploop noploop_branch.txt
+BINS=iTLB_benchmark dTLB_benchmark noploop
+OBJS=iTLB_benchmark.o dTLB_benchmark.o iTLB_benchmark_function.o
+OUTPUTS=$(BINS) $(OBJS) iTLB_benchmark_function.c noploop_branch.txt
all: $(OUTPUTS)
@@ -9,5 +11,12 @@
noploop_branch.txt: noploop
./find_loop_instructions.py $< > $@
+iTLB_benchmark: iTLB_benchmark.o iTLB_benchmark_function.o
+
+dTLB_benchmark: dTLB_benchmark.o
+
+iTLB_benchmark_function.c: generateBenchmarkFunction.sh
+ ./generateBenchmarkFunction.sh > iTLB_benchmark_function.c
+
clean:
rm -rf $(OUTPUTS)
diff --git a/client/site_tests/hardware_PerfCounterVerification/src/dTLB_benchmark.c b/client/site_tests/hardware_PerfCounterVerification/src/dTLB_benchmark.c
new file mode 100644
index 0000000..fc55836
--- /dev/null
+++ b/client/site_tests/hardware_PerfCounterVerification/src/dTLB_benchmark.c
@@ -0,0 +1,39 @@
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[]) {
+ unsigned long i, block_cnt = 100;
+ char** blocks;
+ long page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size == -1) {
+ page_size = (1 << 12); // 4Kb
+ }
+
+ if (argc > 1) {
+ block_cnt = strtoul(argv[1], NULL, 10);
+ if (block_cnt < 1) {
+ block_cnt = 1;
+ }
+ }
+
+ blocks = (char**) malloc(block_cnt * sizeof(char*));
+ for (i = 0; i < block_cnt; i++) {
+ char* dummy_ptr = (char*) malloc(page_size * sizeof(char)); // forcing fragmentation
+ blocks[i] = (char*) malloc(page_size * sizeof(char));
+ free(dummy_ptr);
+ }
+
+ for (i = 0; i < block_cnt; i++) {
+ char dummy_char = blocks[i][0];
+ }
+
+ for(i = 0; i < block_cnt; i++) {
+ free(blocks[i]);
+ }
+
+ free(blocks);
+
+ return 0;
+}
diff --git a/client/site_tests/hardware_PerfCounterVerification/src/generateBenchmarkFunction.sh b/client/site_tests/hardware_PerfCounterVerification/src/generateBenchmarkFunction.sh
new file mode 100755
index 0000000..0b21f6f
--- /dev/null
+++ b/client/site_tests/hardware_PerfCounterVerification/src/generateBenchmarkFunction.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# This script generates large function intended to
+# cause as many iTLB misses as possible.
+
+# Number of instructions:
+# 4k - page size
+# x 64 - supposed number of TLB entires
+# x 2 - executing a function sized page_size * tlb_entry_count multiple
+# times would cause tlb misses only on the first call and tlb entries
+# would be valid for each next call. Doubling the size of the function
+# guarantees invalidating tlb entires and thus causing tlb misses.
+
+echo "void iTLB_bechmark_function() {"
+echo " int a = 0, b = 0;"
+
+for (( c=0; c < (1 << 18) ; c++ )) ; do
+ echo " a = b + 1;"
+ echo " b = a + 1;"
+done
+
+echo "}"
diff --git a/client/site_tests/hardware_PerfCounterVerification/src/iTLB_benchmark.c b/client/site_tests/hardware_PerfCounterVerification/src/iTLB_benchmark.c
new file mode 100644
index 0000000..7809f44
--- /dev/null
+++ b/client/site_tests/hardware_PerfCounterVerification/src/iTLB_benchmark.c
@@ -0,0 +1,18 @@
+#include <stdlib.h>
+#include "iTLB_benchmark_function.h"
+
+int main(int argc, char *argv[]) {
+ unsigned long loops = 1000;
+ if (argc > 1) {
+ loops = strtoul(argv[1], NULL, 10);
+ if (loops < 1) {
+ loops = 1;
+ }
+ }
+
+ while (--loops) {
+ iTLB_bechmark_function();
+ }
+
+ return 0;
+}
diff --git a/client/site_tests/hardware_PerfCounterVerification/src/iTLB_benchmark_function.h b/client/site_tests/hardware_PerfCounterVerification/src/iTLB_benchmark_function.h
new file mode 100644
index 0000000..dd31701
--- /dev/null
+++ b/client/site_tests/hardware_PerfCounterVerification/src/iTLB_benchmark_function.h
@@ -0,0 +1 @@
+void iTLB_bechmark_function();