client/site_tests/hardware_PerfCounterVerification/perf_lbr_verification.py - mirrors/cros/chromiumos/third_party/autotest - Git at Google

 #!/usr/bin/python2.7
 # Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import contextlib
 import itertools
 import os
 import os.path
 import re
 import subprocess

 import numpy

 import stats_utils


 class Error(Exception):
     """Module error class."""

 class TestFail(Error):
     """Indicates a test condition failed (as opposed to tool failure)."""


 @contextlib.contextmanager
 def CleanupFile(path):
     """Context manager that deletes path on exit."""
     try:
         yield
     finally:
         os.remove(path)


 DEVNULL = open('/dev/null', 'w')


 class Mmap(object):
     """Represents a memory map, and does the (un)mapping arithmetic."""

     def __init__(self, start, length, pgoff):
         self.start = start
         self.length = length
         self.pgoff = pgoff

     def __repr__(self):
         return '[%x(%x) @ %x]' % (self.start, self.length, self.pgoff)

     def Map(self, ip):
         """Turns ip from a virtual mapped address back to a dso address.

         (Frankly I think these are named backwards. This follows the naming
         convention of perf's struct map.)
         """
         # See perf's util/map.h: map__map_ip()
         return (ip + self.pgoff) - self.start

     def Unmap(self, rip):
         """Turns ip from a dso address into a virtual mapped address."""
         # See perf's util/map.h: map__unmap_ip()
         return self.start + (rip - self.pgoff)

     MMAP_LINE_RE = re.compile(
             r'(?P<event_ts>\d+) '
             r'(?P<event_offset>0x[0-9a-fA-F]+|0) '
             r'[[](?P<event_size>0x[0-9a-fA-F]+|0)[]]: '
             r'PERF_RECORD_MMAP '
             r'(?P<pid>-?\d+)/(?P<tid>-?\d+): '
             r'[[]'
                 r'(?P<start>0x[0-9a-fA-F]+|0)'
                 r'[(](?P<length>0x[0-9a-fA-F]+|0)[)] @ '
                 r'(?P<pgoff>0x[0-9a-fA-F]+|0)'
             r'[]]: '
             r'((?P<executable>[rx]) )?'
             r'(?P<filename>.*)')

     @staticmethod
     def GetFromPerfData(perf_data_filename, mmap_filename):
         """Parse perf_data_filename and find how mmap_filename was mapped.

         @param perf_data_filename: perf.data filename.
         @param mmap_filename: Look for this mmap.
         @returns: Mmap object representing the map for mmap_filename.
         """
         result = None
         raw_trace_proc = subprocess.Popen(
                 ('perf', 'report', '-D', '-i', perf_data_filename),
                 stdout=subprocess.PIPE, stderr=DEVNULL)
         for line in raw_trace_proc.stdout:
             if 'PERF_RECORD_MMAP' not in line:
                 continue
             match = Mmap.MMAP_LINE_RE.match(line)
             if not match:
                 raise Error('Unexpected format for MMAP record in raw dump:\n' +
                             line)
             if match.group('filename') == mmap_filename:
                 args = match.group('start', 'length', 'pgoff')
                 result = Mmap(*tuple(int(x, 16) for x in args))
                 break
         for line in raw_trace_proc.stdout:
             # Skip rest of output
             pass
         raw_trace_proc.wait()
         return result

 RAW_EVENT_CODES = {
     'br_inst_retired.all_branches': 'r4c4',
 }

 def TranslateEvents(events):
     return [RAW_EVENT_CODES.get(e, e) for e in events]


 # This is the right value for SandyBridge, IvyBridge and Haswell, at least.
 # See Intel manual vol. 3B, 17.4.8 LBR
 # TODO: Consider detecting if 16 is the correct branch buffer length base on the
 # uarch. However, all uarchs we run on have a 16-long buffer.
 BRANCH_BUFFER_LENGTH = 16

 def EstimateExpectedSamples(loops, count):
     """Calculate the number of SAMPLE events expected.

     ie, expect estimate * BRANCH_BUFFER_LENGTH branches to be sampled.

     Incorporates the "observer effect": includes branches caused by returning
     from PMU interrupts.

     Includes one extra sample due to alignment of samples in the series of
     branches. This sample can be expected "most" of the time, but it is not
     incorrect for it to be missing.

     @param loops: the number of noploop branches executed.
     @param count: the event sampling period. ie, a sample should be collected
         every count branches.
     """
     sample_count = 1 # assume program prolog takes one sample

     all_branches = loops
     loop_samples = loops/(count-1)
     while loop_samples >= 1:
         all_branches += loop_samples
         # compounding branches caused by samples caused by samples caused ...
         loop_samples = loop_samples/(count-1)

     sample_count += all_branches / count
     sample_count += 1  # due to alignment
     return sample_count


 def _CountRecordedBranches(perf_data_filename, dso_name, branch_addresses):
     """Count the branches recorded in perf_data_filename using perf report.

     Count the total number of branches recorded, and also the count recorded
     at a specific branch.

     @param perf_data_filename: perf data filename
     @param dso_name: dso that the branch specified by branch_addresses
         pertains to.
     @param branch_addresses: pair of (source, target) addresses specifying the
         branch within dso_name to count.
     @returns: pair with the the total branches recorded, and the count for
         the specified branch.
     """
     mmap = Mmap.GetFromPerfData(perf_data_filename, dso_name)
     out = subprocess.check_output(
             ('perf', 'report', '-i', perf_data_filename, '-nv',
              '-s', 'dso_from,symbol_from,dso_to,symbol_to'),
             stderr=DEVNULL)
     total_sampled_branches = 0
     branch_samples = 0
     for line in out.splitlines():
         if not line or line.startswith('#'):
             continue
         record = line.split()
         samples = int(record[1])
         dso_from = record[2]
         raw_from_address = int(record[3], 16)
         dso_to = record[7]
         raw_to_address = int(record[8], 16)

         # including non-loop branches
         total_sampled_branches += samples

         if not (dso_from == dso_to == dso_name):
             continue
         from_address = mmap.Map(raw_from_address)
         to_address = mmap.Map(raw_to_address)
         if (from_address, to_address) == branch_addresses:
             branch_samples += samples  # should only match once.

     return total_sampled_branches, branch_samples


 def GatherPerfBranchSamples(noploop, branch_addresses, events, count,
                             progress_func=lambda i, j: None):
     """Run perf record -b with the given events, and noploop program.

     Expects to record the branch specified by branch_addresses.

     @param noploop: Path to noploop binary. It should take one argument (number
         of loop iterations) and produce no output.
     @param branch_addresses: pair of branch (source, target) addresses.
     @param events: Value to pass to '-e' arg of perf stat, which determines when
         the branch buffer is sampled. ':u' will be appended to each event in
         order to sample only userspace branches. Some events may be translated
         to raw event codes if necessary.
     @param count: Event period to sample.
     @returns: List of dicts containing facts about the executions of noploop.
     """
     events = TranslateEvents(events.split(','))
     events = ','.join(e + ':u' for e in events)
     facts = []
     for i, j in itertools.product(xrange(10), xrange(5)):
         progress_func(i, j)
         loops = (i+1) * 10000000  # (i+1) * 10 million
         fact = {'loops': loops}
         perf_data = 'perf.lbr.noploop.%d.%d.data' % (loops, j)
         with CleanupFile(perf_data):
             subprocess.check_call(
                     ('perf', 'record', '-o', perf_data,
                      '-b', '-e', events, '-c', '%d' % count,
                      noploop, '%d' % loops),
                     stderr=DEVNULL)
             noploop_dso_name = os.path.abspath(noploop)
             total_sampled_branches, branch_samples = _CountRecordedBranches(
                     perf_data, noploop_dso_name, branch_addresses)
             fact['branch_count'] = branch_samples

             total_samples = total_sampled_branches / BRANCH_BUFFER_LENGTH
             total_expected_samples = EstimateExpectedSamples(loops, count)
             if not (total_samples == total_expected_samples or
                     total_samples == total_expected_samples - 1):  # alignment
                 raise TestFail('Saw the wrong number of samples: '
                                'saw %d, expected %d or %d' %
                                (total_samples,
                                 total_expected_samples,
                                 total_expected_samples - 1))

             if fact['branch_count'] == 0:
                 raise TestFail('No matching branch records found.')
             facts.append(fact)
     progress_func(-1, -1)  # Finished
     return facts


 def ReadBranchAddressesFile(filename):
     with open(filename, 'r') as f:
         branch = tuple(int(x, 16) for x in f.read().split())
     return branch


 def main():
     """Verify the operation of LBR using a simple noploop program and perf."""
     def _Progress(i, j):
         if i == -1 and j == -1:  # Finished
             print
             return
         if j == 0:
             if i != 0:
                 print
             print i, ':',
         print j,
         sys.stdout.flush()
     branch = ReadBranchAddressesFile('src/noploop_branch.txt')
     facts = GatherPerfBranchSamples('src/noploop', branch,
                                     'br_inst_retired.all_branches',
                                     10000,
                                     progress_func=_Progress)
     dt = numpy.dtype([('loops', numpy.int), ('branch_count', numpy.int)])
     a = stats_utils.FactsToNumpyArray(facts, dt)
     (slope, intercept), r2 = stats_utils.LinearRegression(
         a['loops'], a['branch_count'])
     for f in facts:
         print f
     print "slope:", slope
     print "intercept:", intercept
     print "r-squared:", r2

 if __name__ == '__main__':
     main()
	#!/usr/bin/python2.7
	# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import contextlib
	import itertools
	import os
	import os.path
	import re
	import subprocess

	import numpy

	import stats_utils


	class Error(Exception):
	"""Module error class."""

	class TestFail(Error):
	"""Indicates a test condition failed (as opposed to tool failure)."""


	@contextlib.contextmanager
	def CleanupFile(path):
	"""Context manager that deletes path on exit."""
	try:
	yield
	finally:
	os.remove(path)


	DEVNULL = open('/dev/null', 'w')


	class Mmap(object):
	"""Represents a memory map, and does the (un)mapping arithmetic."""

	def __init__(self, start, length, pgoff):
	self.start = start
	self.length = length
	self.pgoff = pgoff

	def __repr__(self):
	return '[%x(%x) @ %x]' % (self.start, self.length, self.pgoff)

	def Map(self, ip):
	"""Turns ip from a virtual mapped address back to a dso address.

	(Frankly I think these are named backwards. This follows the naming
	convention of perf's struct map.)
	"""
	# See perf's util/map.h: map__map_ip()
	return (ip + self.pgoff) - self.start

	def Unmap(self, rip):
	"""Turns ip from a dso address into a virtual mapped address."""
	# See perf's util/map.h: map__unmap_ip()
	return self.start + (rip - self.pgoff)

	MMAP_LINE_RE = re.compile(
	r'(?P<event_ts>\d+) '
	r'(?P<event_offset>0x[0-9a-fA-F]+\|0) '
	r'[[](?P<event_size>0x[0-9a-fA-F]+\|0)[]]: '
	r'PERF_RECORD_MMAP '
	r'(?P<pid>-?\d+)/(?P<tid>-?\d+): '
	r'[[]'
	r'(?P<start>0x[0-9a-fA-F]+\|0)'
	r'[(](?P<length>0x[0-9a-fA-F]+\|0)[)] @ '
	r'(?P<pgoff>0x[0-9a-fA-F]+\|0)'
	r'[]]: '
	r'((?P<executable>[rx]) )?'
	r'(?P<filename>.*)')

	@staticmethod
	def GetFromPerfData(perf_data_filename, mmap_filename):
	"""Parse perf_data_filename and find how mmap_filename was mapped.

	@param perf_data_filename: perf.data filename.
	@param mmap_filename: Look for this mmap.
	@returns: Mmap object representing the map for mmap_filename.
	"""
	result = None
	raw_trace_proc = subprocess.Popen(
	('perf', 'report', '-D', '-i', perf_data_filename),
	stdout=subprocess.PIPE, stderr=DEVNULL)
	for line in raw_trace_proc.stdout:
	if 'PERF_RECORD_MMAP' not in line:
	continue
	match = Mmap.MMAP_LINE_RE.match(line)
	if not match:
	raise Error('Unexpected format for MMAP record in raw dump:\n' +
	line)
	if match.group('filename') == mmap_filename:
	args = match.group('start', 'length', 'pgoff')
	result = Mmap(*tuple(int(x, 16) for x in args))
	break
	for line in raw_trace_proc.stdout:
	# Skip rest of output
	pass
	raw_trace_proc.wait()
	return result

	RAW_EVENT_CODES = {
	'br_inst_retired.all_branches': 'r4c4',
	}

	def TranslateEvents(events):
	return [RAW_EVENT_CODES.get(e, e) for e in events]


	# This is the right value for SandyBridge, IvyBridge and Haswell, at least.
	# See Intel manual vol. 3B, 17.4.8 LBR
	# TODO: Consider detecting if 16 is the correct branch buffer length base on the
	# uarch. However, all uarchs we run on have a 16-long buffer.
	BRANCH_BUFFER_LENGTH = 16

	def EstimateExpectedSamples(loops, count):
	"""Calculate the number of SAMPLE events expected.

	ie, expect estimate * BRANCH_BUFFER_LENGTH branches to be sampled.

	Incorporates the "observer effect": includes branches caused by returning
	from PMU interrupts.

	Includes one extra sample due to alignment of samples in the series of
	branches. This sample can be expected "most" of the time, but it is not
	incorrect for it to be missing.

	@param loops: the number of noploop branches executed.
	@param count: the event sampling period. ie, a sample should be collected
	every count branches.
	"""
	sample_count = 1 # assume program prolog takes one sample

	all_branches = loops
	loop_samples = loops/(count-1)
	while loop_samples >= 1:
	all_branches += loop_samples
	# compounding branches caused by samples caused by samples caused ...
	loop_samples = loop_samples/(count-1)

	sample_count += all_branches / count
	sample_count += 1 # due to alignment
	return sample_count


	def _CountRecordedBranches(perf_data_filename, dso_name, branch_addresses):
	"""Count the branches recorded in perf_data_filename using perf report.

	Count the total number of branches recorded, and also the count recorded
	at a specific branch.

	@param perf_data_filename: perf data filename
	@param dso_name: dso that the branch specified by branch_addresses
	pertains to.
	@param branch_addresses: pair of (source, target) addresses specifying the
	branch within dso_name to count.
	@returns: pair with the the total branches recorded, and the count for
	the specified branch.
	"""
	mmap = Mmap.GetFromPerfData(perf_data_filename, dso_name)
	out = subprocess.check_output(
	('perf', 'report', '-i', perf_data_filename, '-nv',
	'-s', 'dso_from,symbol_from,dso_to,symbol_to'),
	stderr=DEVNULL)
	total_sampled_branches = 0
	branch_samples = 0
	for line in out.splitlines():
	if not line or line.startswith('#'):
	continue
	record = line.split()
	samples = int(record[1])
	dso_from = record[2]
	raw_from_address = int(record[3], 16)
	dso_to = record[7]
	raw_to_address = int(record[8], 16)

	# including non-loop branches
	total_sampled_branches += samples

	if not (dso_from == dso_to == dso_name):
	continue
	from_address = mmap.Map(raw_from_address)
	to_address = mmap.Map(raw_to_address)
	if (from_address, to_address) == branch_addresses:
	branch_samples += samples # should only match once.

	return total_sampled_branches, branch_samples


	def GatherPerfBranchSamples(noploop, branch_addresses, events, count,
	progress_func=lambda i, j: None):
	"""Run perf record -b with the given events, and noploop program.

	Expects to record the branch specified by branch_addresses.

	@param noploop: Path to noploop binary. It should take one argument (number
	of loop iterations) and produce no output.
	@param branch_addresses: pair of branch (source, target) addresses.
	@param events: Value to pass to '-e' arg of perf stat, which determines when
	the branch buffer is sampled. ':u' will be appended to each event in
	order to sample only userspace branches. Some events may be translated
	to raw event codes if necessary.
	@param count: Event period to sample.
	@returns: List of dicts containing facts about the executions of noploop.
	"""
	events = TranslateEvents(events.split(','))
	events = ','.join(e + ':u' for e in events)
	facts = []
	for i, j in itertools.product(xrange(10), xrange(5)):
	progress_func(i, j)
	loops = (i+1) * 10000000 # (i+1) * 10 million
	fact = {'loops': loops}
	perf_data = 'perf.lbr.noploop.%d.%d.data' % (loops, j)
	with CleanupFile(perf_data):
	subprocess.check_call(
	('perf', 'record', '-o', perf_data,
	'-b', '-e', events, '-c', '%d' % count,
	noploop, '%d' % loops),
	stderr=DEVNULL)
	noploop_dso_name = os.path.abspath(noploop)
	total_sampled_branches, branch_samples = _CountRecordedBranches(
	perf_data, noploop_dso_name, branch_addresses)
	fact['branch_count'] = branch_samples

	total_samples = total_sampled_branches / BRANCH_BUFFER_LENGTH
	total_expected_samples = EstimateExpectedSamples(loops, count)
	if not (total_samples == total_expected_samples or
	total_samples == total_expected_samples - 1): # alignment
	raise TestFail('Saw the wrong number of samples: '
	'saw %d, expected %d or %d' %
	(total_samples,
	total_expected_samples,
	total_expected_samples - 1))

	if fact['branch_count'] == 0:
	raise TestFail('No matching branch records found.')
	facts.append(fact)
	progress_func(-1, -1) # Finished
	return facts


	def ReadBranchAddressesFile(filename):
	with open(filename, 'r') as f:
	branch = tuple(int(x, 16) for x in f.read().split())
	return branch


	def main():
	"""Verify the operation of LBR using a simple noploop program and perf."""
	def _Progress(i, j):
	if i == -1 and j == -1: # Finished
	print
	return
	if j == 0:
	if i != 0:
	print
	print i, ':',
	print j,
	sys.stdout.flush()
	branch = ReadBranchAddressesFile('src/noploop_branch.txt')
	facts = GatherPerfBranchSamples('src/noploop', branch,
	'br_inst_retired.all_branches',
	10000,
	progress_func=_Progress)
	dt = numpy.dtype([('loops', numpy.int), ('branch_count', numpy.int)])
	a = stats_utils.FactsToNumpyArray(facts, dt)
	(slope, intercept), r2 = stats_utils.LinearRegression(
	a['loops'], a['branch_count'])
	for f in facts:
	print f
	print "slope:", slope
	print "intercept:", intercept
	print "r-squared:", r2

	if __name__ == '__main__':
	main()