more preparations for full DIE parsing:
- DWARFStructs got a new target_addr field that reflects the size of address fields in a CU
- DWARFInfo now gives access to the symbol table
- fixed stream parsing bugs that happened because the stream was not being preserved during parsing while issuing calls to other APIs that also move the stream
diff --git a/elftools/common/utils.py b/elftools/common/utils.py
index a36b3ec..5358072 100644
--- a/elftools/common/utils.py
+++ b/elftools/common/utils.py
@@ -41,3 +41,18 @@
if not cond:
raise exception_type(msg)
+
+from contextlib import contextmanager
+
+@contextmanager
+def preserve_stream_pos(stream):
+ """ Usage:
+
+ # stream has some position FOO (return value of stream.tell())
+ with preserve_stream_pos(stream):
+ # do stuff that manipulates the stream
+ # stream still has position FOO
+ """
+ saved_pos = stream.tell()
+ yield
+ stream.seek(saved_pos)
diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py
index 4a034fb..fa1e5b8 100644
--- a/elftools/dwarf/die.py
+++ b/elftools/dwarf/die.py
@@ -9,12 +9,23 @@
from collections import namedtuple
from ..common.ordereddict import OrderedDict
-from ..common.utils import struct_parse
+from ..common.utils import struct_parse, preserve_stream_pos
-# Describes an attribute value in the DIE: form and actual value
+# Describes an attribute value in the DIE:
#
-AttributeValue = namedtuple('AttributeValue', 'form value')
+# form:
+# The DW_FORM_* name of this attribute
+#
+# value:
+# The value parsed from the section and translated accordingly to the form
+# (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
+#
+# raw_value:
+# Raw value as parsed from the section - used for debugging and presentation
+# (e.g. for a DW_FORM_strp it's the raw string offset into the table)
+#
+AttributeValue = namedtuple('AttributeValue', 'form value raw_value')
class DIE(object):
@@ -30,34 +41,51 @@
The size this DIE occupies in the section
attributes:
- An ordered dictionary mapping attribute names to values
+ An ordered dictionary mapping attribute names to values. It's
+ ordered to enable both efficient name->value mapping and
+ preserve the order of attributes in the section
"""
def __init__(self, cu, stream, offset):
""" cu:
CompileUnit object this DIE belongs to. Used to obtain context
information (structs, abbrev table, etc.)
-
+
stream, offset:
The stream and offset into it where this DIE's data is located
"""
self.cu = cu
+ self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
self.stream = stream
self.offset = offset
+ self.attributes = OrderedDict()
self._parse_DIE()
def _parse_DIE(self):
""" Parses the DIE info from the section, based on the abbreviation
table of the CU
"""
- saved_offset = self.offset
+ print self.offset, self.cu.structs.dwarf_format
structs = self.cu.structs
- # The DIE begins with the abbreviation code. Read it and use it to
- # obtain the abbrev declaration for this DIE
+ # A DIE begins with the abbreviation code. Read it and use it to
+ # obtain the abbrev declaration for this DIE.
+ # Note: here and elsewhere, preserve_stream_pos is used on operations
+ # that manipulate the stream by reading data from it.
#
- abbrev_code = struct_parse(structs.Dwarf_uleb128(''), self.stream)
- abbrev = self.cu.get_abbrev_table().get_abbrev(abbrev_code)
+ abbrev_code = struct_parse(
+ structs.Dwarf_uleb128(''), self.stream, self.offset)
+ with preserve_stream_pos(self.stream):
+ abbrev = self.cu.get_abbrev_table().get_abbrev(abbrev_code)
- print abbrev_code, abbrev, abbrev.decl
+ print '**', abbrev_code, abbrev, abbrev.decl
+
+ # Guided by the attributes listed in the abbreviation declaration, parse
+ # values from the stream.
+ #
+ for name, form in abbrev.iter_attr_specs():
+ print '** parsing at stream + ', self.stream.tell()
+ raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
+ print '**', name, form, raw_value
+ #~ print structs.Dwarf_dw_form[form]
diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py
index 343b576..4301643 100644
--- a/elftools/dwarf/dwarfinfo.py
+++ b/elftools/dwarf/dwarfinfo.py
@@ -8,6 +8,7 @@
#-------------------------------------------------------------------------------
from collections import namedtuple
+from ..construct import CString
from ..common.exceptions import DWARFError
from ..common.utils import struct_parse, dwarf_assert
from .structs import DWARFStructs
@@ -48,10 +49,14 @@
self.debug_line_loc = debug_line_loc
self.little_endian = little_endian
- self.dwarf_format = 32
+
+ # This is the DWARFStructs the context uses, so it doesn't depend on
+ # DWARF format and address_size (these are determined per CU) - so we
+ # set them to default values.
self.structs = DWARFStructs(
little_endian=self.little_endian,
- dwarf_format=self.dwarf_format)
+ dwarf_format=32,
+ address_size=4)
# Populate the list with CUs found in debug_info
self._CU = self._parse_CUs()
@@ -107,6 +112,15 @@
"Offset '0x%x' to debug_info out of section bounds" % offset)
return offset + self.debug_info_loc.offset
+ def get_string_from_table(self, offset):
+ """ Obtain a string from the string table section, given an offset
+ relative to the section.
+ """
+ return struct_parse(
+ CString(''),
+ self.stream,
+ stream_pos=self.debug_str_loc.offset + offset)
+
def _parse_CUs(self):
""" Parse CU entries from debug_info.
"""
@@ -124,14 +138,27 @@
#
initial_length = struct_parse(
self.structs.Dwarf_uint32(''), self.stream, offset)
- if initial_length == 0xFFFFFFFF:
- self.dwarf_format = 64
+ dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
+
+ # At this point we still haven't read the whole header, so we don't
+ # know the address_size. Therefore, we're going to create structs
+ # with a default address_size=4. If, after parsing the header, we
+ # find out address_size is actually 8, we just create a new structs
+ # object for this CU.
+ #
cu_structs = DWARFStructs(
little_endian=self.little_endian,
- dwarf_format=self.dwarf_format)
+ dwarf_format=dwarf_format,
+ address_size=4)
cu_header = struct_parse(
cu_structs.Dwarf_CU_header, self.stream, offset)
+ if cu_header['address_size'] == 8:
+ cu_structs = DWARFStructs(
+ little_endian=self.little_endian,
+ dwarf_format=dwarf_format,
+ address_size=8)
+
cu_die_offset = self.stream.tell()
dwarf_assert(
self._is_supported_version(cu_header['version']),
diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py
index 536132b..0f3094c 100644
--- a/elftools/dwarf/structs.py
+++ b/elftools/dwarf/structs.py
@@ -8,8 +8,7 @@
# This code is in the public domain
#-------------------------------------------------------------------------------
from ..construct import (
- UBInt8, UBInt16, UBInt32, UBInt64,
- ULInt8, ULInt16, ULInt32, ULInt64,
+ UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum,
PrefixedArray, CString,
)
@@ -19,7 +18,8 @@
class DWARFStructs(object):
""" Exposes Construct structs suitable for parsing information from DWARF
- sections. Configurable with endianity and format (32 or 64-bit)
+ sections. Each compile unit in DWARF info can have its own structs
+ object.
Accessible attributes (mostly described by in chapter 7 of the DWARF
spec v3):
@@ -30,6 +30,9 @@
Dwarf_offset:
32-bit or 64-bit word, depending on dwarf_format
+ Dwarf_target_addr:
+ 32-bit or 64-bit word, depending on address size
+
Dwarf_initial_length:
"Initial length field" encoding
section 7.4
@@ -51,10 +54,22 @@
See also the documentation of public methods.
"""
- def __init__(self, little_endian=True, dwarf_format=32):
+ def __init__(self, little_endian, dwarf_format, address_size):
+ """ little_endian:
+ True if the file is little endian, False if big
+
+ dwarf_format:
+ DWARF Format: 32 or 64-bit (see spec section 7.4)
+
+ address_size:
+ Target machine address size, in bytes (4 or 8). (See spec
+ section 7.5.1)
+ """
assert dwarf_format == 32 or dwarf_format == 64
+ assert address_size == 8 or address_size == 4
self.little_endian = little_endian
- self.dwarf_format = dwarf_format
+ self.dwarf_format = dwarf_format
+ self.address_size = address_size
self._create_structs()
def initial_lenght_field_size(self):
@@ -69,12 +84,16 @@
self.Dwarf_uint32 = ULInt32
self.Dwarf_uint64 = ULInt64
self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
+ self.Dwarf_target_addr = (
+ ULInt32 if self.address_size == 4 else ULInt64)
else:
self.Dwarf_uint8 = UBInt8
self.Dwarf_uint16 = UBInt16
self.Dwarf_uint32 = UBInt32
self.Dwarf_uint64 = UBInt64
self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
+ self.Dwarf_target_addr = (
+ UBInt32 if self.address_size == 4 else UBInt64)
self._create_initial_length()
self._create_leb128()
@@ -120,7 +139,7 @@
def _create_dw_form(self):
self.Dwarf_dw_form = dict(
- DW_FORM_addr=self.Dwarf_offset(''),
+ DW_FORM_addr=self.Dwarf_target_addr(''),
DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
diff --git a/z.py b/z.py
index 013a7c7..4f2d848 100644
--- a/z.py
+++ b/z.py
@@ -21,7 +21,10 @@
dwarfinfo = efile.get_dwarf_info()
+print dwarfinfo.get_string_from_table(126)
+
cu = dwarfinfo.get_CU(1)
+print 'CU header', cu.header
print cu.get_top_DIE()
#~ print dwarfinfo.structs.Dwarf_abbrev_entry.parse('\x13\x01\x01\x03\x50\x04\x00\x00')