fdo_scripts/summarize_hot_blocks.py - mirrors/cros/chromiumos/third_party/toolchain-utils - Git at Google

 #!/usr/bin/python
 #
 # Copyright 2011 Google Inc. All Rights Reserved.

 """Summarize hottest basic blocks found while doing a ChromeOS FDO build.

 Here is an example execution:

   summarize_hot_blocks.py
    --data_dir=~/chromeos/chroot/var/cache/chromeos-chrome/ --cutoff=10000
    --output_dir=/home/x/y

 With the cutoff, it will ignore any basic blocks that have a count less
 than what is specified (in this example 10000)
 The script looks inside the directory (this is typically a directory where
 the object files are generated) for files with *.profile and *.optimized
 suffixes. To get these, the following flags were added to the compiler
 invokation within vanilla_vs_fdo.py in the profile-use phase.

               "-fdump-tree-optimized-blocks-lineno "
               "-fdump-ipa-profile-blocks-lineno "

 Here is an example of the *.profile and *.optimized files contents:

 # BLOCK 7 freq:3901 count:60342, starting at line 92
 # PRED: 6 [39.0%]  count:60342 (true,exec)
   [url_canon_internal.cc : 92:28] MEM[(const char * *)source_6(D) + 16B] = D.28080_17;
   [url_canon_internal.cc : 93:41] MEM[(struct Component *)parsed_4(D) + 16B] = MEM[(const struct Component &)repl_1(D) + 80];
 # SUCC: 8 [100.0%]  count:60342 (fallthru,exec)
 # BLOCK 8 freq:10000 count:154667, starting at line 321
 # PRED: 7 [100.0%]  count:60342 (fallthru,exec) 6 [61.0%]  count:94325 (false,exec)
   [url_canon_internal.cc : 321:51] # DEBUG D#10 => [googleurl/src/url_canon_internal.cc : 321] &parsed_4(D)->host

 this script finds the blocks with highest count and shows the first line
 of each block so that it is easy to identify the origin of the basic block.

 """

 __author__ = "llozano@google.com (Luis Lozano)"

 import optparse
 import os
 import re
 import shutil
 import sys
 import tempfile

 from utils import command_executer


 # Given a line, check if it has a block count and return it.
 # Return -1 if there is no match
 def GetBlockCount(line):
   match_obj = re.match(".*# BLOCK \d+ .*count:(\d+)", line)
   if match_obj:
     return int(match_obj.group(1))
   else:
     return -1


 class Collector(object):
   def __init__(self, data_dir, cutoff, output_dir, tempdir):
     self._data_dir = data_dir
     self._cutoff = cutoff
     self._output_dir = output_dir
     self._tempdir = tempdir
     self._ce = command_executer.GetCommandExecuter()

   def CollectFileList(self, file_exp, list_file):
     command = ("find %s -type f -name '%s' > %s" %
                (self._data_dir, file_exp,
                 os.path.join(self._tempdir, list_file)))
     ret = self._ce.RunCommand(command)
     if ret:
       raise Exception("Failed: %s" % command)

   def SummarizeLines(self, data_file):
     sum_lines = []
     search_lno = False
     for line in data_file:
       count = GetBlockCount(line)
       if count != -1:
         if count >= self._cutoff:
           search_lno = True
           sum_line = line.strip()
           sum_count = count
       # look for a line that starts with line number information
       elif search_lno and re.match("^\s*\[.*: \d*:\d*]", line):
         search_lno = False
         sum_lines.append("%d:%s: %s %s" %
                          (sum_count, data_file.name, sum_line, line))
     return sum_lines

   # Look for blocks in the data file that have a count larger than the cutoff
   # and generate a sorted summary file of the hottest blocks.
   def SummarizeFile(self, data_file, sum_file):
     with open(data_file, "r") as f:
       sum_lines = self.SummarizeLines(f)

     # sort reverse the list in place by the block count number
     sum_lines.sort(key=GetBlockCount, reverse=True)

     with open(sum_file, "w") as sf:
       sf.write("".join(sum_lines))

     print "Generated file Summary: ", sum_file

   # Find hottest blocks in the list of files, generate a sorted summary for
   # each file and then do a sorted merge of all the summaries.
   def SummarizeList(self, list_file, summary_file):
     with open(os.path.join(self._tempdir, list_file)) as f:
       sort_list = []
       for file_name in f:
         file_name = file_name.strip()
         sum_file = "%s.sum" % file_name
         sort_list.append("%s%s" % (sum_file, chr(0)))
         self.SummarizeFile(file_name, sum_file)

     tmp_list_file = os.path.join(self._tempdir, "file_list.dat")
     with open(tmp_list_file, "w") as file_list_file:
       for x in sort_list:
         file_list_file.write(x)

     merge_command = ("sort -nr -t: -k1 --merge --files0-from=%s > %s " %
                      (tmp_list_file, summary_file))

     ret = self._ce.RunCommand(merge_command)
     if ret:
       raise Exception("Failed: %s" % merge_command)
     print "Generated general summary: ", summary_file

   def SummarizePreOptimized(self, summary_file):
     self.CollectFileList("*.profile", "chrome.profile.list")
     self.SummarizeList("chrome.profile.list",
                        os.path.join(self._output_dir, summary_file))

   def SummarizeOptimized(self, summary_file):
     self.CollectFileList("*.optimized", "chrome.optimized.list")
     self.SummarizeList("chrome.optimized.list",
                        os.path.join(self._output_dir, summary_file))


 def Main(argv):
   command_executer.InitCommandExecuter()
   usage = ("usage: %prog --data_dir=<dir> --cutoff=<value> "
            "--output_dir=<dir> [--keep_tmp]")
   parser = optparse.OptionParser(usage=usage)
   parser.add_option("--data_dir",
                     dest="data_dir",
                     help=("directory where the FDO (*.profile and "
                           "*.optimized) files are located"))
   parser.add_option("--cutoff",
                     dest="cutoff",
                     help="Minimum count to consider for each basic block")
   parser.add_option("--output_dir",
                     dest="output_dir",
                     help=("directory where summary data will be generated"
                           "(pre_optimized.txt, optimized.txt)"))
   parser.add_option("--keep_tmp",
                     action="store_true",
                     dest="keep_tmp",
                     default=False,
                     help=("Keep directory with temporary files"
                           "(for debugging purposes)"))
   options = parser.parse_args(argv)[0]
   if not all((options.data_dir, options.cutoff, options.output_dir)):
     parser.print_help()
     sys.exit(1)

   tempdir = tempfile.mkdtemp()

   co = Collector(options.data_dir, int(options.cutoff), options.output_dir,
                  tempdir)
   co.SummarizePreOptimized("pre_optimized.txt")
   co.SummarizeOptimized("optimized.txt")

   if not options.keep_tmp:
     shutil.rmtree(tempdir, ignore_errors=True)

   return 0

 if __name__ == "__main__":
   retval = Main(sys.argv)
   sys.exit(retval)
	#!/usr/bin/python
	#
	# Copyright 2011 Google Inc. All Rights Reserved.

	"""Summarize hottest basic blocks found while doing a ChromeOS FDO build.

	Here is an example execution:

	summarize_hot_blocks.py
	--data_dir=~/chromeos/chroot/var/cache/chromeos-chrome/ --cutoff=10000
	--output_dir=/home/x/y

	With the cutoff, it will ignore any basic blocks that have a count less
	than what is specified (in this example 10000)
	The script looks inside the directory (this is typically a directory where
	the object files are generated) for files with .profile and .optimized
	suffixes. To get these, the following flags were added to the compiler
	invokation within vanilla_vs_fdo.py in the profile-use phase.

	"-fdump-tree-optimized-blocks-lineno "
	"-fdump-ipa-profile-blocks-lineno "

	Here is an example of the .profile and .optimized files contents:

	# BLOCK 7 freq:3901 count:60342, starting at line 92
	# PRED: 6 [39.0%] count:60342 (true,exec)
	[url_canon_internal.cc : 92:28] MEM[(const char * *)source_6(D) + 16B] = D.28080_17;
	[url_canon_internal.cc : 93:41] MEM[(struct Component *)parsed_4(D) + 16B] = MEM[(const struct Component &)repl_1(D) + 80];
	# SUCC: 8 [100.0%] count:60342 (fallthru,exec)
	# BLOCK 8 freq:10000 count:154667, starting at line 321
	# PRED: 7 [100.0%] count:60342 (fallthru,exec) 6 [61.0%] count:94325 (false,exec)
	[url_canon_internal.cc : 321:51] # DEBUG D#10 => [googleurl/src/url_canon_internal.cc : 321] &parsed_4(D)->host

	this script finds the blocks with highest count and shows the first line
	of each block so that it is easy to identify the origin of the basic block.

	"""

	__author__ = "llozano@google.com (Luis Lozano)"

	import optparse
	import os
	import re
	import shutil
	import sys
	import tempfile

	from utils import command_executer


	# Given a line, check if it has a block count and return it.
	# Return -1 if there is no match
	def GetBlockCount(line):
	match_obj = re.match(".# BLOCK \d+ .count:(\d+)", line)
	if match_obj:
	return int(match_obj.group(1))
	else:
	return -1


	class Collector(object):
	def __init__(self, data_dir, cutoff, output_dir, tempdir):
	self._data_dir = data_dir
	self._cutoff = cutoff
	self._output_dir = output_dir
	self._tempdir = tempdir
	self._ce = command_executer.GetCommandExecuter()

	def CollectFileList(self, file_exp, list_file):
	command = ("find %s -type f -name '%s' > %s" %
	(self._data_dir, file_exp,
	os.path.join(self._tempdir, list_file)))
	ret = self._ce.RunCommand(command)
	if ret:
	raise Exception("Failed: %s" % command)

	def SummarizeLines(self, data_file):
	sum_lines = []
	search_lno = False
	for line in data_file:
	count = GetBlockCount(line)
	if count != -1:
	if count >= self._cutoff:
	search_lno = True
	sum_line = line.strip()
	sum_count = count
	# look for a line that starts with line number information
	elif search_lno and re.match("^\s\[.: \d:\d]", line):
	search_lno = False
	sum_lines.append("%d:%s: %s %s" %
	(sum_count, data_file.name, sum_line, line))
	return sum_lines

	# Look for blocks in the data file that have a count larger than the cutoff
	# and generate a sorted summary file of the hottest blocks.
	def SummarizeFile(self, data_file, sum_file):
	with open(data_file, "r") as f:
	sum_lines = self.SummarizeLines(f)

	# sort reverse the list in place by the block count number
	sum_lines.sort(key=GetBlockCount, reverse=True)

	with open(sum_file, "w") as sf:
	sf.write("".join(sum_lines))

	print "Generated file Summary: ", sum_file

	# Find hottest blocks in the list of files, generate a sorted summary for
	# each file and then do a sorted merge of all the summaries.
	def SummarizeList(self, list_file, summary_file):
	with open(os.path.join(self._tempdir, list_file)) as f:
	sort_list = []
	for file_name in f:
	file_name = file_name.strip()
	sum_file = "%s.sum" % file_name
	sort_list.append("%s%s" % (sum_file, chr(0)))
	self.SummarizeFile(file_name, sum_file)

	tmp_list_file = os.path.join(self._tempdir, "file_list.dat")
	with open(tmp_list_file, "w") as file_list_file:
	for x in sort_list:
	file_list_file.write(x)

	merge_command = ("sort -nr -t: -k1 --merge --files0-from=%s > %s " %
	(tmp_list_file, summary_file))

	ret = self._ce.RunCommand(merge_command)
	if ret:
	raise Exception("Failed: %s" % merge_command)
	print "Generated general summary: ", summary_file

	def SummarizePreOptimized(self, summary_file):
	self.CollectFileList("*.profile", "chrome.profile.list")
	self.SummarizeList("chrome.profile.list",
	os.path.join(self._output_dir, summary_file))

	def SummarizeOptimized(self, summary_file):
	self.CollectFileList("*.optimized", "chrome.optimized.list")
	self.SummarizeList("chrome.optimized.list",
	os.path.join(self._output_dir, summary_file))


	def Main(argv):
	command_executer.InitCommandExecuter()
	usage = ("usage: %prog --data_dir=<dir> --cutoff=<value> "
	"--output_dir=<dir> [--keep_tmp]")
	parser = optparse.OptionParser(usage=usage)
	parser.add_option("--data_dir",
	dest="data_dir",
	help=("directory where the FDO (*.profile and "
	"*.optimized) files are located"))
	parser.add_option("--cutoff",
	dest="cutoff",
	help="Minimum count to consider for each basic block")
	parser.add_option("--output_dir",
	dest="output_dir",
	help=("directory where summary data will be generated"
	"(pre_optimized.txt, optimized.txt)"))
	parser.add_option("--keep_tmp",
	action="store_true",
	dest="keep_tmp",
	default=False,
	help=("Keep directory with temporary files"
	"(for debugging purposes)"))
	options = parser.parse_args(argv)[0]
	if not all((options.data_dir, options.cutoff, options.output_dir)):
	parser.print_help()
	sys.exit(1)

	tempdir = tempfile.mkdtemp()

	co = Collector(options.data_dir, int(options.cutoff), options.output_dir,
	tempdir)
	co.SummarizePreOptimized("pre_optimized.txt")
	co.SummarizeOptimized("optimized.txt")

	if not options.keep_tmp:
	shutil.rmtree(tempdir, ignore_errors=True)

	return 0

	if __name__ == "__main__":
	retval = Main(sys.argv)
	sys.exit(retval)