merge
diff --git a/elftools/common/utils.py b/elftools/common/utils.py
index a36b3ec..5358072 100644
--- a/elftools/common/utils.py
+++ b/elftools/common/utils.py
@@ -41,3 +41,18 @@
if not cond:
raise exception_type(msg)
+
+from contextlib import contextmanager
+
+@contextmanager
+def preserve_stream_pos(stream):
+ """ Usage:
+
+ # stream has some position FOO (return value of stream.tell())
+ with preserve_stream_pos(stream):
+ # do stuff that manipulates the stream
+ # stream still has position FOO
+ """
+ saved_pos = stream.tell()
+ yield
+ stream.seek(saved_pos)
diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py
index 8852fac..ec04f7d 100644
--- a/elftools/dwarf/compileunit.py
+++ b/elftools/dwarf/compileunit.py
@@ -10,6 +10,23 @@
class CompileUnit(object):
+ """ A DWARF compilation unit (CU).
+
+ A normal compilation unit typically represents the text and data
+ contributed to an executable by a single relocatable object file.
+ It may be derived from several source files,
+ including pre-processed "include files"
+
+ Serves as a container and context to DIEs that describe objects and code
+ belonging to a compilation unit.
+
+ CU header entries can be accessed as dict keys from this object, i.e.
+ cu = CompileUnit(...)
+ cu['version'] # version field of the CU header
+
+ To get the top-level DIE describing the compilation unit, call the
+ get_top_DIE method.
+ """
def __init__(self, header, dwarfinfo, structs, cu_die_offset):
""" header:
CU header for this compile unit
diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py
index 4a034fb..26e9dba 100644
--- a/elftools/dwarf/die.py
+++ b/elftools/dwarf/die.py
@@ -9,12 +9,27 @@
from collections import namedtuple
from ..common.ordereddict import OrderedDict
-from ..common.utils import struct_parse
+from ..common.utils import struct_parse, preserve_stream_pos
-# Describes an attribute value in the DIE: form and actual value
+# AttributeValue - describes an attribute value in the DIE:
#
-AttributeValue = namedtuple('AttributeValue', 'form value')
+# form:
+# The DW_FORM_* name of this attribute
+#
+# value:
+# The value parsed from the section and translated accordingly to the form
+# (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
+#
+# raw_value:
+# Raw value as parsed from the section - used for debugging and presentation
+# (e.g. for a DW_FORM_strp it's the raw string offset into the table)
+#
+# offset:
+# Offset of this attribute's value in the stream
+#
+AttributeValue = namedtuple(
+ 'AttributeValue', 'form value raw_value offset')
class DIE(object):
@@ -26,38 +41,100 @@
tag:
The DIE tag
- length:
+ size:
The size this DIE occupies in the section
attributes:
- An ordered dictionary mapping attribute names to values
+ An ordered dictionary mapping attribute names to values. It's
+ ordered to enable both efficient name->value mapping and
+ preserve the order of attributes in the section
+
+ has_children:
+ Specifies whether this DIE has children
"""
def __init__(self, cu, stream, offset):
""" cu:
CompileUnit object this DIE belongs to. Used to obtain context
information (structs, abbrev table, etc.)
-
+
stream, offset:
The stream and offset into it where this DIE's data is located
"""
self.cu = cu
+ self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
self.stream = stream
self.offset = offset
- self._parse_DIE()
+
+ self.attributes = OrderedDict()
+ self.tag = None
+ self.has_children = None
+ self.size = 0
+
+ self._parse_DIE()
+
+ def is_null(self):
+ """ Is this a null entry?
+ """
+ return self.tag is None
+
+ #------ PRIVATE ------#
def _parse_DIE(self):
""" Parses the DIE info from the section, based on the abbreviation
table of the CU
"""
- saved_offset = self.offset
structs = self.cu.structs
- # The DIE begins with the abbreviation code. Read it and use it to
- # obtain the abbrev declaration for this DIE
+ # A DIE begins with the abbreviation code. Read it and use it to
+ # obtain the abbrev declaration for this DIE.
+ # Note: here and elsewhere, preserve_stream_pos is used on operations
+ # that manipulate the stream by reading data from it.
#
- abbrev_code = struct_parse(structs.Dwarf_uleb128(''), self.stream)
- abbrev = self.cu.get_abbrev_table().get_abbrev(abbrev_code)
+ abbrev_code = struct_parse(
+ structs.Dwarf_uleb128(''), self.stream, self.offset)
- print abbrev_code, abbrev, abbrev.decl
+ # This may be a null entry
+ if abbrev_code == 0:
+ self.size = self.stream.tell() - self.offset
+ return
+
+ with preserve_stream_pos(self.stream):
+ abbrev_decl = self.cu.get_abbrev_table().get_abbrev(abbrev_code)
+ self.tag = abbrev_decl['tag']
+ self.has_children = abbrev_decl.has_children()
+
+ # Guided by the attributes listed in the abbreviation declaration, parse
+ # values from the stream.
+ #
+ for name, form in abbrev_decl.iter_attr_specs():
+ attr_offset = self.stream.tell()
+ raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
+ value = self._translate_attr_value(form, raw_value)
+ self.attributes[name] = AttributeValue(
+ form=form,
+ value=value,
+ raw_value=raw_value,
+ offset=attr_offset)
+
+ self.size = self.stream.tell() - self.offset
-
+ def _translate_attr_value(self, form, raw_value):
+ """ Translate a raw attr value according to the form
+ """
+ value = None
+ if form == 'DW_FORM_strp':
+ with preserve_stream_pos(self.stream):
+ value = self.dwarfinfo.get_string_from_table(raw_value)
+ elif form == 'DW_FORM_flag':
+ value = not raw_value == 0
+ elif form == 'DW_FORM_indirect':
+ form = raw_value
+ raw_value = struct_parse(
+ structs.Dwarf_dw_form[form], self.stream)
+ # Let's hope this doesn't get too deep :-)
+ return self._translate_attr_value(form, raw_value)
+ else:
+ value = raw_value
+ return value
+
+
\ No newline at end of file
diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py
index 343b576..7a4bbc0 100644
--- a/elftools/dwarf/dwarfinfo.py
+++ b/elftools/dwarf/dwarfinfo.py
@@ -8,6 +8,7 @@
#-------------------------------------------------------------------------------
from collections import namedtuple
+from ..construct import CString
from ..common.exceptions import DWARFError
from ..common.utils import struct_parse, dwarf_assert
from .structs import DWARFStructs
@@ -48,12 +49,21 @@
self.debug_line_loc = debug_line_loc
self.little_endian = little_endian
- self.dwarf_format = 32
+
+ # This is the DWARFStructs the context uses, so it doesn't depend on
+ # DWARF format and address_size (these are determined per CU) - set them
+ # to default values.
self.structs = DWARFStructs(
little_endian=self.little_endian,
- dwarf_format=self.dwarf_format)
+ dwarf_format=32,
+ address_size=4)
- # Populate the list with CUs found in debug_info
+ # Populate the list with CUs found in debug_info. For each CU only its
+ # header is parsed immediately (the abbrev table isn't loaded before
+ # it's being referenced by one of the CU's DIEs).
+ # Since there usually aren't many CUs in a single object, this
+ # shouldn't present a performance problem.
+ #
self._CU = self._parse_CUs()
# Cache for abbrev tables: a dict keyed by offset
@@ -107,6 +117,17 @@
"Offset '0x%x' to debug_info out of section bounds" % offset)
return offset + self.debug_info_loc.offset
+ def get_string_from_table(self, offset):
+ """ Obtain a string from the string table section, given an offset
+ relative to the section.
+ """
+ return struct_parse(
+ CString(''),
+ self.stream,
+ stream_pos=self.debug_str_loc.offset + offset)
+
+ #------ PRIVATE ------#
+
def _parse_CUs(self):
""" Parse CU entries from debug_info.
"""
@@ -124,14 +145,27 @@
#
initial_length = struct_parse(
self.structs.Dwarf_uint32(''), self.stream, offset)
- if initial_length == 0xFFFFFFFF:
- self.dwarf_format = 64
+ dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
+
+ # At this point we still haven't read the whole header, so we don't
+ # know the address_size. Therefore, we're going to create structs
+ # with a default address_size=4. If, after parsing the header, we
+ # find out address_size is actually 8, we just create a new structs
+ # object for this CU.
+ #
cu_structs = DWARFStructs(
little_endian=self.little_endian,
- dwarf_format=self.dwarf_format)
+ dwarf_format=dwarf_format,
+ address_size=4)
cu_header = struct_parse(
cu_structs.Dwarf_CU_header, self.stream, offset)
+ if cu_header['address_size'] == 8:
+ cu_structs = DWARFStructs(
+ little_endian=self.little_endian,
+ dwarf_format=dwarf_format,
+ address_size=8)
+
cu_die_offset = self.stream.tell()
dwarf_assert(
self._is_supported_version(cu_header['version']),
diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py
index 536132b..0f3094c 100644
--- a/elftools/dwarf/structs.py
+++ b/elftools/dwarf/structs.py
@@ -8,8 +8,7 @@
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..construct import (
- UBInt8, UBInt16, UBInt32, UBInt64,
- ULInt8, ULInt16, ULInt32, ULInt64,
+ UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum,
PrefixedArray, CString,
)
@@ -19,7 +18,8 @@
class DWARFStructs(object):
""" Exposes Construct structs suitable for parsing information from DWARF
- sections. Configurable with endianity and format (32 or 64-bit)
+ sections. Each compile unit in DWARF info can have its own structs
+ object.
Accessible attributes (mostly described by in chapter 7 of the DWARF
spec v3):
@@ -30,6 +30,9 @@
Dwarf_offset:
32-bit or 64-bit word, depending on dwarf_format
+ Dwarf_target_addr:
+ 32-bit or 64-bit word, depending on address size
+
Dwarf_initial_length:
"Initial length field" encoding
section 7.4
@@ -51,10 +54,22 @@
See also the documentation of public methods.
"""
- def __init__(self, little_endian=True, dwarf_format=32):
+ def __init__(self, little_endian, dwarf_format, address_size):
+ """ little_endian:
+ True if the file is little endian, False if big
+
+ dwarf_format:
+ DWARF Format: 32 or 64-bit (see spec section 7.4)
+
+ address_size:
+ Target machine address size, in bytes (4 or 8). (See spec
+ section 7.5.1)
+ """
assert dwarf_format == 32 or dwarf_format == 64
+ assert address_size == 8 or address_size == 4
self.little_endian = little_endian
- self.dwarf_format = dwarf_format
+ self.dwarf_format = dwarf_format
+ self.address_size = address_size
self._create_structs()
def initial_lenght_field_size(self):
@@ -69,12 +84,16 @@
self.Dwarf_uint32 = ULInt32
self.Dwarf_uint64 = ULInt64
self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
+ self.Dwarf_target_addr = (
+ ULInt32 if self.address_size == 4 else ULInt64)
else:
self.Dwarf_uint8 = UBInt8
self.Dwarf_uint16 = UBInt16
self.Dwarf_uint32 = UBInt32
self.Dwarf_uint64 = UBInt64
self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
+ self.Dwarf_target_addr = (
+ UBInt32 if self.address_size == 4 else UBInt64)
self._create_initial_length()
self._create_leb128()
@@ -120,7 +139,7 @@
def _create_dw_form(self):
self.Dwarf_dw_form = dict(
- DW_FORM_addr=self.Dwarf_offset(''),
+ DW_FORM_addr=self.Dwarf_target_addr(''),
DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
diff --git a/z.py b/z.py
index 013a7c7..5e5e775 100644
--- a/z.py
+++ b/z.py
@@ -20,9 +20,15 @@
print efile.has_dwarf_info()
dwarfinfo = efile.get_dwarf_info()
+tt = dwarfinfo.structs.Dwarf_dw_form['DW_FORM_block1'].parse('\x03\x12\x34\x46')
-cu = dwarfinfo.get_CU(1)
-print cu.get_top_DIE()
+cu = dwarfinfo.get_CU(0)
+print 'CU header', cu.header
+topdie = cu.get_top_DIE()
+
+print topdie.size, topdie.tag
+for attrname, val in topdie.attributes.iteritems():
+ print attrname, val
#~ print dwarfinfo.structs.Dwarf_abbrev_entry.parse('\x13\x01\x01\x03\x50\x04\x00\x00')