merge

commit: 0fb9a992d644fd4bcfd8b685285c327cab14ac73 [log] [tgz]
author: Eli Bendersky <eliben@gmail.com> Fri Sep 23 11:00:34 2011 +0300
committer: Eli Bendersky <eliben@gmail.com> Fri Sep 23 11:00:34 2011 +0300
tree: dae7a88f0204ae91db942f55eee5e311ec757c2e
parent: d87f1e5ffe135984172175239db34fc343adaf6c [diff]
parent: 89a824fd227b7e006dbbc18cf09006bdaeb350e4 [diff]
diff --git a/elftools/common/utils.py b/elftools/common/utils.py
index a36b3ec..5358072 100644
--- a/elftools/common/utils.py
+++ b/elftools/common/utils.py

@@ -41,3 +41,18 @@
     if not cond:
         raise exception_type(msg)
 
+
+from contextlib import contextmanager
+
+@contextmanager
+def preserve_stream_pos(stream):
+    """ Usage:
+            
+            # stream has some position FOO (return value of stream.tell())
+            with preserve_stream_pos(stream):
+                # do stuff that manipulates the stream
+            # stream still has position FOO
+    """
+    saved_pos = stream.tell()
+    yield
+    stream.seek(saved_pos)

diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py
index 8852fac..ec04f7d 100644
--- a/elftools/dwarf/compileunit.py
+++ b/elftools/dwarf/compileunit.py

@@ -10,6 +10,23 @@
 
 
 class CompileUnit(object):
+    """ A DWARF compilation unit (CU). 
+    
+            A normal compilation unit typically represents the text and data
+            contributed to an executable by a single relocatable object file.
+            It may be derived from several source files, 
+            including pre-processed "include files"
+            
+        Serves as a container and context to DIEs that describe objects and code
+        belonging to a compilation unit.
+        
+        CU header entries can be accessed as dict keys from this object, i.e.
+           cu = CompileUnit(...)
+           cu['version']  # version field of the CU header
+        
+        To get the top-level DIE describing the compilation unit, call the 
+        get_top_DIE method.
+    """
     def __init__(self, header, dwarfinfo, structs, cu_die_offset):
         """ header:
                 CU header for this compile unit

diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py
index 4a034fb..26e9dba 100644
--- a/elftools/dwarf/die.py
+++ b/elftools/dwarf/die.py

@@ -9,12 +9,27 @@
 from collections import namedtuple
 
 from ..common.ordereddict import OrderedDict
-from ..common.utils import struct_parse
+from ..common.utils import struct_parse, preserve_stream_pos
 
 
-# Describes an attribute value in the DIE: form and actual value
+# AttributeValue - describes an attribute value in the DIE: 
 #
-AttributeValue = namedtuple('AttributeValue', 'form value')
+# form: 
+#   The DW_FORM_* name of this attribute
+#
+# value:
+#   The value parsed from the section and translated accordingly to the form
+#   (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
+#
+# raw_value:
+#   Raw value as parsed from the section - used for debugging and presentation
+#   (e.g. for a DW_FORM_strp it's the raw string offset into the table)
+#
+# offset:
+#   Offset of this attribute's value in the stream
+#
+AttributeValue = namedtuple(
+    'AttributeValue', 'form value raw_value offset')
 
 
 class DIE(object):
@@ -26,38 +41,100 @@
             tag:
                 The DIE tag
         
-            length:
+            size:
                 The size this DIE occupies in the section
             
             attributes:
-                An ordered dictionary mapping attribute names to values
+                An ordered dictionary mapping attribute names to values. It's 
+                ordered to enable both efficient name->value mapping and
+                preserve the order of attributes in the section
+            
+            has_children:
+                Specifies whether this DIE has children
     """
     def __init__(self, cu, stream, offset):
         """ cu:
                 CompileUnit object this DIE belongs to. Used to obtain context
                 information (structs, abbrev table, etc.)
-            
+                        
             stream, offset:
                 The stream and offset into it where this DIE's data is located
         """
         self.cu = cu
+        self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
         self.stream = stream
         self.offset = offset
-        self._parse_DIE()
+        
+        self.attributes = OrderedDict()
+        self.tag = None
+        self.has_children = None
+        self.size = 0
+        
+        self._parse_DIE()   
+    
+    def is_null(self):
+        """ Is this a null entry?
+        """
+        return self.tag is None
+    
+    #------ PRIVATE ------#
     
     def _parse_DIE(self):
         """ Parses the DIE info from the section, based on the abbreviation
             table of the CU
         """
-        saved_offset = self.offset
         structs = self.cu.structs
         
-        # The DIE begins with the abbreviation code. Read it and use it to 
-        # obtain the abbrev declaration for this DIE
+        # A DIE begins with the abbreviation code. Read it and use it to 
+        # obtain the abbrev declaration for this DIE.
+        # Note: here and elsewhere, preserve_stream_pos is used on operations
+        # that manipulate the stream by reading data from it.
         #
-        abbrev_code = struct_parse(structs.Dwarf_uleb128(''), self.stream)
-        abbrev = self.cu.get_abbrev_table().get_abbrev(abbrev_code)
+        abbrev_code = struct_parse(
+            structs.Dwarf_uleb128(''), self.stream, self.offset)
         
-        print abbrev_code, abbrev, abbrev.decl
+        # This may be a null entry
+        if abbrev_code == 0:
+            self.size = self.stream.tell() - self.offset
+            return
+        
+        with preserve_stream_pos(self.stream):
+            abbrev_decl = self.cu.get_abbrev_table().get_abbrev(abbrev_code)
+        self.tag = abbrev_decl['tag']
+        self.has_children = abbrev_decl.has_children()
+        
+        # Guided by the attributes listed in the abbreviation declaration, parse
+        # values from the stream.
+        #
+        for name, form in abbrev_decl.iter_attr_specs():
+            attr_offset = self.stream.tell()
+            raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
+            value = self._translate_attr_value(form, raw_value)            
+            self.attributes[name] = AttributeValue(
+                form=form,
+                value=value,
+                raw_value=raw_value,
+                offset=attr_offset)
+        
+        self.size = self.stream.tell() - self.offset
 
-
+    def _translate_attr_value(self, form, raw_value):
+        """ Translate a raw attr value according to the form
+        """
+        value = None
+        if form == 'DW_FORM_strp':
+            with preserve_stream_pos(self.stream):
+                value = self.dwarfinfo.get_string_from_table(raw_value)
+        elif form == 'DW_FORM_flag':
+            value = not raw_value == 0
+        elif form == 'DW_FORM_indirect':
+            form = raw_value
+            raw_value = struct_parse(
+                structs.Dwarf_dw_form[form], self.stream)
+            # Let's hope this doesn't get too deep :-)
+            return self._translate_attr_value(form, raw_value)
+        else:
+            value = raw_value
+        return value
+        
+         
\ No newline at end of file

diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py
index 343b576..7a4bbc0 100644
--- a/elftools/dwarf/dwarfinfo.py
+++ b/elftools/dwarf/dwarfinfo.py

@@ -8,6 +8,7 @@
 #-------------------------------------------------------------------------------
 from collections import namedtuple
 
+from ..construct import CString
 from ..common.exceptions import DWARFError
 from ..common.utils import struct_parse, dwarf_assert
 from .structs import DWARFStructs
@@ -48,12 +49,21 @@
         self.debug_line_loc = debug_line_loc
         
         self.little_endian = little_endian
-        self.dwarf_format = 32
+        
+        # This is the DWARFStructs the context uses, so it doesn't depend on 
+        # DWARF format and address_size (these are determined per CU) - set them
+        # to default values.
         self.structs = DWARFStructs(
             little_endian=self.little_endian,
-            dwarf_format=self.dwarf_format)
+            dwarf_format=32,
+            address_size=4)
         
-        # Populate the list with CUs found in debug_info
+        # Populate the list with CUs found in debug_info. For each CU only its
+        # header is parsed immediately (the abbrev table isn't loaded before
+        # it's being referenced by one of the CU's DIEs). 
+        # Since there usually aren't many CUs in a single object, this
+        # shouldn't present a performance problem.
+        #
         self._CU = self._parse_CUs()
         
         # Cache for abbrev tables: a dict keyed by offset
@@ -107,6 +117,17 @@
             "Offset '0x%x' to debug_info out of section bounds" % offset)
         return offset + self.debug_info_loc.offset
     
+    def get_string_from_table(self, offset):
+        """ Obtain a string from the string table section, given an offset 
+            relative to the section.
+        """
+        return struct_parse(
+            CString(''),
+            self.stream,
+            stream_pos=self.debug_str_loc.offset + offset)
+    
+    #------ PRIVATE ------#
+    
     def _parse_CUs(self):
         """ Parse CU entries from debug_info.
         """
@@ -124,14 +145,27 @@
             #
             initial_length = struct_parse(
                 self.structs.Dwarf_uint32(''), self.stream, offset)
-            if initial_length == 0xFFFFFFFF:
-                self.dwarf_format = 64
+            dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32
+            
+            # At this point we still haven't read the whole header, so we don't
+            # know the address_size. Therefore, we're going to create structs
+            # with a default address_size=4. If, after parsing the header, we
+            # find out address_size is actually 8, we just create a new structs
+            # object for this CU.
+            #
             cu_structs = DWARFStructs(
                 little_endian=self.little_endian,
-                dwarf_format=self.dwarf_format)
+                dwarf_format=dwarf_format,
+                address_size=4)
             
             cu_header = struct_parse(
                 cu_structs.Dwarf_CU_header, self.stream, offset)
+            if cu_header['address_size'] == 8:
+                cu_structs = DWARFStructs(
+                    little_endian=self.little_endian,
+                    dwarf_format=dwarf_format,
+                     address_size=8)
+            
             cu_die_offset = self.stream.tell()
             dwarf_assert(
                 self._is_supported_version(cu_header['version']),

diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py
index 536132b..0f3094c 100644
--- a/elftools/dwarf/structs.py
+++ b/elftools/dwarf/structs.py

@@ -8,8 +8,7 @@
 # This code is in the public domain
 #-------------------------------------------------------------------------------
 from ..construct import (
-    UBInt8, UBInt16, UBInt32, UBInt64,
-    ULInt8, ULInt16, ULInt32, ULInt64,
+    UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
     Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum,
     PrefixedArray, CString,
     )
@@ -19,7 +18,8 @@
 
 class DWARFStructs(object):
     """ Exposes Construct structs suitable for parsing information from DWARF 
-        sections. Configurable with endianity and format (32 or 64-bit)
+        sections. Each compile unit in DWARF info can have its own structs
+        object.
     
         Accessible attributes (mostly described by in chapter 7 of the DWARF
         spec v3):
@@ -30,6 +30,9 @@
             Dwarf_offset:
                 32-bit or 64-bit word, depending on dwarf_format
             
+            Dwarf_target_addr:
+                32-bit or 64-bit word, depending on address size
+            
             Dwarf_initial_length:
                 "Initial length field" encoding
                 section 7.4
@@ -51,10 +54,22 @@
         
         See also the documentation of public methods.
     """
-    def __init__(self, little_endian=True, dwarf_format=32):
+    def __init__(self, little_endian, dwarf_format, address_size):
+        """ little_endian:
+                True if the file is little endian, False if big
+            
+            dwarf_format:
+                DWARF Format: 32 or 64-bit (see spec section 7.4)
+            
+            address_size:
+                Target machine address size, in bytes (4 or 8). (See spec 
+                section 7.5.1)
+        """
         assert dwarf_format == 32 or dwarf_format == 64
+        assert address_size == 8 or address_size == 4
         self.little_endian = little_endian
-        self.dwarf_format = dwarf_format        
+        self.dwarf_format = dwarf_format  
+        self.address_size = address_size
         self._create_structs()
 
     def initial_lenght_field_size(self):
@@ -69,12 +84,16 @@
             self.Dwarf_uint32 = ULInt32
             self.Dwarf_uint64 = ULInt64
             self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
+            self.Dwarf_target_addr = (
+                ULInt32 if self.address_size == 4 else ULInt64)
         else:
             self.Dwarf_uint8 = UBInt8
             self.Dwarf_uint16 = UBInt16
             self.Dwarf_uint32 = UBInt32
             self.Dwarf_uint64 = UBInt64
             self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
+            self.Dwarf_target_addr = (
+                UBInt32 if self.address_size == 4 else UBInt64)
 
         self._create_initial_length()
         self._create_leb128()
@@ -120,7 +139,7 @@
 
     def _create_dw_form(self):
         self.Dwarf_dw_form = dict(
-            DW_FORM_addr=self.Dwarf_offset(''),
+            DW_FORM_addr=self.Dwarf_target_addr(''),
             
             DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
             DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),

diff --git a/z.py b/z.py
index 013a7c7..5e5e775 100644
--- a/z.py
+++ b/z.py

@@ -20,9 +20,15 @@
 print efile.has_dwarf_info()
 
 dwarfinfo = efile.get_dwarf_info()
+tt = dwarfinfo.structs.Dwarf_dw_form['DW_FORM_block1'].parse('\x03\x12\x34\x46')
 
-cu = dwarfinfo.get_CU(1)
-print cu.get_top_DIE()
+cu = dwarfinfo.get_CU(0)
+print 'CU header', cu.header
+topdie = cu.get_top_DIE()
+
+print topdie.size, topdie.tag
+for attrname, val in topdie.attributes.iteritems():
+    print attrname, val
 
 #~ print dwarfinfo.structs.Dwarf_abbrev_entry.parse('\x13\x01\x01\x03\x50\x04\x00\x00')
commit	0fb9a992d644fd4bcfd8b685285c327cab14ac73	[log] [tgz]
author	Eli Bendersky <eliben@gmail.com>	Fri Sep 23 11:00:34 2011 +0300
committer	Eli Bendersky <eliben@gmail.com>	Fri Sep 23 11:00:34 2011 +0300
tree	dae7a88f0204ae91db942f55eee5e311ec757c2e
parent	d87f1e5ffe135984172175239db34fc343adaf6c [diff]
parent	89a824fd227b7e006dbbc18cf09006bdaeb350e4 [diff]