elftools/dwarf/namelut.py - mirrors/cros/chromiumos/third_party/pyelftools - Git at Google

 #-------------------------------------------------------------------------------
 # elftools: dwarf/namelut.py
 #
 # DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
 #
 # Vijay Ramasami (rvijayc@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
 import os
 import collections
 from collections import OrderedDict
 from ..common.utils import struct_parse
 from ..common.py3compat import Mapping
 from bisect import bisect_right
 import math
 from ..construct import CString, Struct, If

 NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')

 class NameLUT(Mapping):
     """
     A "Name LUT" holds any of the tables specified by .debug_pubtypes or
     .debug_pubnames sections. This is basically a dictionary where the key is
     the symbol name (either a public variable, function or a type), and the
     value is the tuple (cu_offset, die_offset) corresponding to the variable.
     The die_offset is an absolute offset (meaning, it can be used to search the
     CU by iterating until a match is obtained).

     An ordered dictionary is used to preserve the CU order (i.e, items are
     stored on a per-CU basis (as it was originally in the .debug_* section).

     Usage:

     The NameLUT walks and talks like a dictionary and hence it can be used as
     such. Some examples below:

     # get the pubnames (a NameLUT from DWARF info).
     pubnames = dwarf_info.get_pubnames()

     # lookup a variable.
     entry1 = pubnames["var_name1"]
     entry2 = pubnames.get("var_name2", default=<default_var>)
     print(entry2.cu_ofs)
     ...

     # iterate over items.
     for (name, entry) in pubnames.items():
       # do stuff with name, entry.cu_ofs, entry.die_ofs

     # iterate over items on a per-CU basis.
     import itertools
     for cu_ofs, item_list in itertools.groupby(pubnames.items(),
         key = lambda x: x[1].cu_ofs):
       # items are now grouped by cu_ofs.
       # item_list is an iterator yeilding NameLUTEntry'ies belonging
       # to cu_ofs.
       # We can parse the CU at cu_offset and use the parsed CU results
       # to parse the pubname DIEs in the CU listed by item_list.
       for item in item_list:
         # work with item which is part of the CU with cu_ofs.

     """

     def __init__(self, stream, size, structs):

         self._stream = stream
         self._size = size
         self._structs = structs
         # entries are lazily loaded on demand.
         self._entries = None
         # CU headers (for readelf).
         self._cu_headers = None

     def get_entries(self):
         """
         Returns the parsed NameLUT entries. The returned object is a dictionary
         with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
         the value.

         This is useful when dealing with very large ELF files with millions of
         entries. The returned entries can be pickled to a file and restored by
         calling set_entries on subsequent loads.
         """
         if self._entries is None:
             self._entries, self._cu_headers = self._get_entries()
         return self._entries

     def set_entries(self, entries, cu_headers):
         """
         Set the NameLUT entries from an external source. The input is a
         dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
         die_ofs) as the value.

         This option is useful when dealing with very large ELF files with
         millions of entries. The entries can be parsed once and pickled to a
         file and can be restored via this function on subsequent loads.
         """
         self._entries = entries
         self._cu_headers = cu_headers

     def __len__(self):
         """
         Returns the number of entries in the NameLUT.
         """
         if self._entries is None:
             self._entries, self._cu_headers = self._get_entries()
         return len(self._entries)

     def __getitem__(self, name):
         """
         Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
         to the given symbol name.
         """
         if self._entries is None:
             self._entries, self._cu_headers = self._get_entries()
         return self._entries.get(name)

     def __iter__(self):
         """
         Returns an iterator to the NameLUT dictionary.
         """
         if self._entries is None:
             self._entries, self._cu_headers = self._get_entries()
         return iter(self._entries)

     def items(self):
         """
         Returns the NameLUT dictionary items.
         """
         if self._entries is None:
             self._entries, self._cu_headers = self._get_entries()
         return self._entries.items()

     def get(self, name, default=None):
         """
         Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
         None if the symbol does not exist in the corresponding section.
         """
         if self._entries is None:
             self._entries, self._cu_headers = self._get_entries()
         return self._entries.get(name, default)

     def get_cu_headers(self):
         """
         Returns all CU headers. Mainly required for readelf.
         """
         if self._cu_headers is None:
             self._entries, self._cu_headers = self._get_entries()

         return self._cu_headers

     def _get_entries(self):
         """
         Parse the (name, cu_ofs, die_ofs) information from this section and
         store as a dictionary.
         """

         self._stream.seek(0)
         entries = OrderedDict()
         cu_headers = []
         offset = 0
         # According to 6.1.1. of DWARFv4, each set of names is terminated by
         # an offset field containing zero (and no following string). Because
         # of sequential parsing, every next entry may be that terminator.
         # So, field "name" is conditional.
         entry_struct = Struct("Dwarf_offset_name_pair",
                 self._structs.Dwarf_offset('die_ofs'),
                 If(lambda ctx: ctx['die_ofs'], CString('name')))

         # each run of this loop will fetch one CU worth of entries.
         while offset < self._size:

             # read the header for this CU.
             namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
                     self._stream, offset)
             cu_headers.append(namelut_hdr)
             # compute the next offset.
             offset = (offset + namelut_hdr.unit_length +
                      self._structs.initial_length_field_size())

             # before inner loop, latch data that will be used in the inner
             # loop to avoid attribute access and other computation.
             hdr_cu_ofs = namelut_hdr.debug_info_offset

             # while die_ofs of the entry is non-zero (which indicates the end) ...
             while True:
                 entry = struct_parse(entry_struct, self._stream)

                 # if it is zero, this is the terminating record.
                 if entry.die_ofs == 0:
                     break
                 # add this entry to the look-up dictionary.
                 entries[entry.name.decode('utf-8')] = NameLUTEntry(
                         cu_ofs = hdr_cu_ofs,
                         die_ofs = hdr_cu_ofs + entry.die_ofs)

         # return the entries parsed so far.
         return (entries, cu_headers)
	#-------------------------------------------------------------------------------
	# elftools: dwarf/namelut.py
	#
	# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
	#
	# Vijay Ramasami (rvijayc@gmail.com)
	# This code is in the public domain
	#-------------------------------------------------------------------------------
	import os
	import collections
	from collections import OrderedDict
	from ..common.utils import struct_parse
	from ..common.py3compat import Mapping
	from bisect import bisect_right
	import math
	from ..construct import CString, Struct, If

	NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')

	class NameLUT(Mapping):
	"""
	A "Name LUT" holds any of the tables specified by .debug_pubtypes or
	.debug_pubnames sections. This is basically a dictionary where the key is
	the symbol name (either a public variable, function or a type), and the
	value is the tuple (cu_offset, die_offset) corresponding to the variable.
	The die_offset is an absolute offset (meaning, it can be used to search the
	CU by iterating until a match is obtained).

	An ordered dictionary is used to preserve the CU order (i.e, items are
	stored on a per-CU basis (as it was originally in the .debug_* section).

	Usage:

	The NameLUT walks and talks like a dictionary and hence it can be used as
	such. Some examples below:

	# get the pubnames (a NameLUT from DWARF info).
	pubnames = dwarf_info.get_pubnames()

	# lookup a variable.
	entry1 = pubnames["var_name1"]
	entry2 = pubnames.get("var_name2", default=<default_var>)
	print(entry2.cu_ofs)
	...

	# iterate over items.
	for (name, entry) in pubnames.items():
	# do stuff with name, entry.cu_ofs, entry.die_ofs

	# iterate over items on a per-CU basis.
	import itertools
	for cu_ofs, item_list in itertools.groupby(pubnames.items(),
	key = lambda x: x[1].cu_ofs):
	# items are now grouped by cu_ofs.
	# item_list is an iterator yeilding NameLUTEntry'ies belonging
	# to cu_ofs.
	# We can parse the CU at cu_offset and use the parsed CU results
	# to parse the pubname DIEs in the CU listed by item_list.
	for item in item_list:
	# work with item which is part of the CU with cu_ofs.

	"""

	def __init__(self, stream, size, structs):

	self._stream = stream
	self._size = size
	self._structs = structs
	# entries are lazily loaded on demand.
	self._entries = None
	# CU headers (for readelf).
	self._cu_headers = None

	def get_entries(self):
	"""
	Returns the parsed NameLUT entries. The returned object is a dictionary
	with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
	the value.

	This is useful when dealing with very large ELF files with millions of
	entries. The returned entries can be pickled to a file and restored by
	calling set_entries on subsequent loads.
	"""
	if self._entries is None:
	self._entries, self._cu_headers = self._get_entries()
	return self._entries

	def set_entries(self, entries, cu_headers):
	"""
	Set the NameLUT entries from an external source. The input is a
	dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
	die_ofs) as the value.

	This option is useful when dealing with very large ELF files with
	millions of entries. The entries can be parsed once and pickled to a
	file and can be restored via this function on subsequent loads.
	"""
	self._entries = entries
	self._cu_headers = cu_headers

	def __len__(self):
	"""
	Returns the number of entries in the NameLUT.
	"""
	if self._entries is None:
	self._entries, self._cu_headers = self._get_entries()
	return len(self._entries)

	def __getitem__(self, name):
	"""
	Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
	to the given symbol name.
	"""
	if self._entries is None:
	self._entries, self._cu_headers = self._get_entries()
	return self._entries.get(name)

	def __iter__(self):
	"""
	Returns an iterator to the NameLUT dictionary.
	"""
	if self._entries is None:
	self._entries, self._cu_headers = self._get_entries()
	return iter(self._entries)

	def items(self):
	"""
	Returns the NameLUT dictionary items.
	"""
	if self._entries is None:
	self._entries, self._cu_headers = self._get_entries()
	return self._entries.items()

	def get(self, name, default=None):
	"""
	Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
	None if the symbol does not exist in the corresponding section.
	"""
	if self._entries is None:
	self._entries, self._cu_headers = self._get_entries()
	return self._entries.get(name, default)

	def get_cu_headers(self):
	"""
	Returns all CU headers. Mainly required for readelf.
	"""
	if self._cu_headers is None:
	self._entries, self._cu_headers = self._get_entries()

	return self._cu_headers

	def _get_entries(self):
	"""
	Parse the (name, cu_ofs, die_ofs) information from this section and
	store as a dictionary.
	"""

	self._stream.seek(0)
	entries = OrderedDict()
	cu_headers = []
	offset = 0
	# According to 6.1.1. of DWARFv4, each set of names is terminated by
	# an offset field containing zero (and no following string). Because
	# of sequential parsing, every next entry may be that terminator.
	# So, field "name" is conditional.
	entry_struct = Struct("Dwarf_offset_name_pair",
	self._structs.Dwarf_offset('die_ofs'),
	If(lambda ctx: ctx['die_ofs'], CString('name')))

	# each run of this loop will fetch one CU worth of entries.
	while offset < self._size:

	# read the header for this CU.
	namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
	self._stream, offset)
	cu_headers.append(namelut_hdr)
	# compute the next offset.
	offset = (offset + namelut_hdr.unit_length +
	self._structs.initial_length_field_size())

	# before inner loop, latch data that will be used in the inner
	# loop to avoid attribute access and other computation.
	hdr_cu_ofs = namelut_hdr.debug_info_offset

	# while die_ofs of the entry is non-zero (which indicates the end) ...
	while True:
	entry = struct_parse(entry_struct, self._stream)

	# if it is zero, this is the terminating record.
	if entry.die_ofs == 0:
	break
	# add this entry to the look-up dictionary.
	entries[entry.name.decode('utf-8')] = NameLUTEntry(
	cu_ofs = hdr_cu_ofs,
	die_ofs = hdr_cu_ofs + entry.die_ofs)

	# return the entries parsed so far.
	return (entries, cu_headers)