initial efforts porting pyelftools to python 3. some basic ELF functionality working

commit: eefac9a7102528afeb1e95475e921bd50e08471c [log] [tgz]
author: Eli Bendersky <eliben@gmail.com> Fri Jan 27 07:08:05 2012 +0200
committer: Eli Bendersky <eliben@gmail.com> Fri Jan 27 07:08:05 2012 +0200
tree: 25136396b81bad0538c182d23d1d12dfe7458df6
parent: 3853a40a7d3fa0dd03251b26a9334fa5c01aca18 [diff]
diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py
index d3e311a..53caa97 100644
--- a/elftools/common/construct_utils.py
+++ b/elftools/common/construct_utils.py

@@ -35,7 +35,7 @@
                 if self.predicate(subobj, context):
                     break
                 obj.append(subobj)
-        except ConstructError, ex:
+        except ConstructError as ex:
             raise ArrayError("missing terminator", ex)
         return obj
     def _build(self, obj, stream, context):

diff --git a/elftools/common/ordereddict.py b/elftools/common/ordereddict.py
index aabeafc..5e0f142 100644
--- a/elftools/common/ordereddict.py
+++ b/elftools/common/ordereddict.py

@@ -262,21 +262,3 @@
         "od.viewitems() -> a set-like object providing a view on od's items"

         return ItemsView(self)

 

-

-

-

-#-------------------------------------------------------------------------------

-if __name__ == "__main__":

-    od = OrderedDict()

-    d = dict()

-    

-    for key in ['joe', 'more', 'tem', 'opsdf', 'dsf']:

-        od[key] = d[key] = key + '1'

-    

-    for k in d:

-        print k, d[k]

-    

-    print '-------- ordered ----------'

-    

-    for k in od:

-        print k, od[k]


diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py
new file mode 100644
index 0000000..bd08bd2
--- /dev/null
+++ b/elftools/common/py3compat.py

@@ -0,0 +1,40 @@
+#-------------------------------------------------------------------------------
+# elftools: common/py3compat.py
+#
+# Python 3 compatibility code
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import sys
+PY3 = sys.version_info[0] == 3
+
+
+if PY3:
+    import io
+    StringIO = io.StringIO
+    BytesIO = io.BytesIO
+
+    import collections
+    OrderedDict = collections.OrderedDict
+
+    _iterkeys = "keys"
+    _iteritems = "items"
+else:
+    import cStringIO
+    StringIO = BytesIO = cStringIO.StringIO
+
+    from .ordereddict import OrderedDict
+
+    _iterkeys = "iterkeys"
+    _iteritems = "iteritems"
+
+
+def iterkeys(d):
+    """Return an iterator over the keys of a dictionary."""
+    return getattr(d, _iterkeys)()
+
+def iteritems(d):
+    """Return an iterator over the items of a dictionary."""
+    return getattr(d, _iteritems)()
+

diff --git a/elftools/construct/lib/utils.py b/elftools/construct/lib/utils.py
deleted file mode 100644
index 86d8b03..0000000
--- a/elftools/construct/lib/utils.py
+++ /dev/null

@@ -1,22 +0,0 @@
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-
-
-try:
-    from struct import Struct as Packer
-except ImportError:
-    from struct import pack, unpack, calcsize
-    class Packer(object):
-        __slots__ = ["format", "size"]
-        def __init__(self, format):
-            self.format = format
-            self.size = calcsize(format)
-        def pack(self, *args):
-            return pack(self.format, *args)
-        def unpack(self, data):
-            return unpack(self.format, data)
-
-
-

diff --git a/elftools/construct/text.py b/elftools/construct/text.py
deleted file mode 100644
index e461dff..0000000
--- a/elftools/construct/text.py
+++ /dev/null

@@ -1,286 +0,0 @@
-from core import *
-from adapters import *
-from macros import *
-
-
-#===============================================================================
-# exceptions
-#===============================================================================
-class QuotedStringError(ConstructError):
-    __slots__ = []
-
-
-#===============================================================================
-# constructs
-#===============================================================================
-class QuotedString(Construct):
-    r"""
-    A quoted string (begins with an opening-quote, terminated by a 
-    closing-quote, which may be escaped by an escape character)
-    
-    Parameters:
-    * name - the name of the field
-    * start_quote - the opening quote character. default is '"'
-    * end_quote - the closing quote character. default is '"'
-    * esc_char - the escape character, or None to disable escaping. defualt
-      is "\" (backslash)
-    * encoding - the character encoding (e.g., "utf8"), or None to return
-      raw bytes. defualt is None.
-    * allow_eof - whether to allow EOF before the closing quote is matched.
-      if False, an exception will be raised when EOF is reached by the closing
-      quote is missing. default is False.
-    
-    Example:
-    QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None)
-    """
-    __slots__ = [
-        "start_quote", "end_quote", "char", "esc_char", "encoding", 
-        "allow_eof"
-    ]
-    def __init__(self, name, start_quote = '"', end_quote = None, 
-                 esc_char = '\\', encoding = None, allow_eof = False):
-        Construct.__init__(self, name)
-        if end_quote is None:
-            end_quote = start_quote
-        self.start_quote = Literal(start_quote)
-        self.char = Char("char")
-        self.end_quote = end_quote
-        self.esc_char = esc_char
-        self.encoding = encoding
-        self.allow_eof = allow_eof
-    
-    def _parse(self, stream, context):
-        self.start_quote._parse(stream, context)
-        text = []
-        escaped = False
-        try:
-            while True:
-                ch = self.char._parse(stream, context)
-                if ch == self.esc_char:
-                    if escaped:
-                        text.append(ch)
-                        escaped = False
-                    else:
-                        escaped = True
-                elif ch == self.end_quote and not escaped:
-                    break
-                else:
-                    text.append(ch)
-                    escaped = False
-        except FieldError:
-            if not self.allow_eof:
-                raise
-        text = "".join(text)
-        if self.encoding is not None:
-            text = text.decode(self.encoding)
-        return text
-    
-    def _build(self, obj, stream, context):
-        self.start_quote._build(None, stream, context)
-        if self.encoding:
-            obj = obj.encode(self.encoding)
-        for ch in obj:
-            if ch == self.esc_char:
-                self.char._build(self.esc_char, stream, context)
-            elif ch == self.end_quote:
-                if self.esc_char is None:
-                    raise QuotedStringError("found ending quote in data, "
-                        "but no escape char defined", ch)
-                else:
-                    self.char._build(self.esc_char, stream, context)
-            self.char._build(ch, stream, context)
-        self.char._build(self.end_quote, stream, context)
-    
-    def _sizeof(self, context):
-        raise SizeofError("can't calculate size")
-
-
-#===============================================================================
-# macros
-#===============================================================================
-class WhitespaceAdapter(Adapter):
-    """
-    Adapter for whitespace sequences; do not use directly.
-    See Whitespace.
-    
-    Parameters:
-    * subcon - the subcon to adapt
-    * build_char - the character used for encoding (building)
-    """
-    __slots__ = ["build_char"]
-    def __init__(self, subcon, build_char):
-        Adapter.__init__(self, subcon)
-        self.build_char = build_char
-    def _encode(self, obj, context):
-        return self.build_char
-    def _decode(self, obj, context):
-        return None
-
-def Whitespace(charset = " \t", optional = True):
-    """whitespace (space that is ignored between tokens). when building, the
-    first character of the charset is used.
-    * charset - the set of characters that are considered whitespace. default
-      is space and tab.
-    * optional - whether or not whitespace is optional. default is True.
-    """
-    con = CharOf(None, charset)
-    if optional:
-        con = OptionalGreedyRange(con)
-    else:
-        con = GreedyRange(con)
-    return WhitespaceAdapter(con, build_char = charset[0])
-
-def Literal(text):
-    """matches a literal string in the text
-    * text - the text (string) to match
-    """
-    return ConstAdapter(Field(None, len(text)), text)
-
-def Char(name):
-    """a one-byte character"""
-    return Field(name, 1)
-
-def CharOf(name, charset):
-    """matches only characters of a given charset
-    * name - the name of the field
-    * charset - the set of valid characters
-    """
-    return OneOf(Char(name), charset)
-
-def CharNoneOf(name, charset):
-    """matches only characters that do not belong to a given charset
-    * name - the name of the field
-    * charset - the set of invalid characters
-    """
-    return NoneOf(Char(name), charset)
-
-def Alpha(name):
-    """a letter character (A-Z, a-z)"""
-    return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'))
-
-def Digit(name):
-    """a digit character (0-9)"""
-    return CharOf(name, set('0123456789'))
-
-def AlphaDigit(name):
-    """an alphanumeric character (A-Z, a-z, 0-9)"""
-    return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
-
-def BinDigit(name):
-    """a binary digit (0-1)"""
-    return CharOf(name, set('01'))
-
-def HexDigit(name):
-    """a hexadecimal digit (0-9, A-F, a-f)"""
-    return CharOf(name, set('0123456789abcdefABCDEF'))
-
-def Word(name):
-    """a sequence of letters"""
-    return StringAdapter(GreedyRange(Alpha(name)))
-
-class TextualIntAdapter(Adapter):
-    """
-    Adapter for textual integers
-    
-    Parameters:
-    * subcon - the subcon to adapt
-    * radix - the base of the integer (decimal, hexadecimal, binary, ...)
-    * digits - the sequence of digits of that radix
-    """
-    __slots__ = ["radix", "digits"]
-    def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"):
-        Adapter.__init__(self, subcon)
-        if radix > len(digits):
-            raise ValueError("not enough digits for radix %d" % (radix,))
-        self.radix = radix
-        self.digits = digits
-    def _encode(self, obj, context):
-        chars = []
-        if obj < 0:
-            chars.append("-")
-            n = -obj
-        else:
-            n = obj
-        r = self.radix
-        digs = self.digits
-        while n > 0:
-            n, d = divmod(n, r)
-            chars.append(digs[d])
-        # obj2 = "".join(reversed(chars))
-        # filler = digs[0] * (self._sizeof(context) - len(obj2))
-        # return filler + obj2
-        return "".join(reversed(chars))
-    def _decode(self, obj, context):
-        return int("".join(obj), self.radix)
-
-def DecNumber(name):
-    """decimal number"""
-    return TextualIntAdapter(GreedyRange(Digit(name)))
-
-def BinNumber(name):
-    """binary number"""
-    return TextualIntAdapter(GreedyRange(Digit(name)), 2)
-
-def HexNumber(name):
-    """hexadecimal number"""
-    return TextualIntAdapter(GreedyRange(Digit(name)), 16)
-
-def StringUpto(name, charset):
-    """a string that stretches up to a terminator, or EOF. unlike CString, 
-    StringUpto will no consume the terminator char.
-    * name - the name of the field
-    * charset - the set of terminator characters"""
-    return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset)))
-
-def Line(name):
-    r"""a textual line (up to "\n")"""
-    return StringUpto(name, "\n")
-
-class IdentifierAdapter(Adapter):
-    """
-    Adapter for programmatic identifiers
-    
-    Parameters:
-    * subcon - the subcon to adapt
-    """
-    def _encode(self, obj, context):
-        return obj[0], obj[1:]
-    def _decode(self, obj, context):
-        return obj[0] + "".join(obj[1])
-
-def Identifier(name, 
-               headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), 
-               tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
-    ):
-    """a programmatic identifier (symbol). must start with a char of headset,
-    followed by a sequence of tailset characters
-    * name - the name of the field
-    * headset - charset for the first character. default is A-Z, a-z, and _
-    * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _
-    """
-    return IdentifierAdapter(
-        Sequence(name,
-            CharOf("head", headset),
-            OptionalGreedyRange(CharOf("tail", tailset)),
-        )
-    )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py
index f9a4500..de67a43 100644
--- a/elftools/dwarf/callframe.py
+++ b/elftools/dwarf/callframe.py

@@ -9,6 +9,7 @@
 import copy
 from collections import namedtuple
 from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
+from ..common.py3compat import iterkeys
 from .structs import DWARFStructs
 from .constants import * 
 
@@ -434,7 +435,7 @@
 # for DW_CFA_* instructions, and mapping their values to names. Since all
 # names were imported from constants with `import *`, we look in globals()
 _OPCODE_NAME_MAP = {}
-for name in list(globals().iterkeys()):
+for name in list(iterkeys(globals())):
     if name.startswith('DW_CFA'):
         _OPCODE_NAME_MAP[globals()[name]] = name
 

diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py
index fe4537b..f0b5eb8 100644
--- a/elftools/dwarf/die.py
+++ b/elftools/dwarf/die.py

@@ -8,7 +8,7 @@
 #-------------------------------------------------------------------------------
 from collections import namedtuple
 
-from ..common.ordereddict import OrderedDict
+from ..common.py3compat import OrderedDict
 from ..common.utils import struct_parse, preserve_stream_pos
 
 

diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py
index eccd486..267c1c7 100644
--- a/elftools/dwarf/dwarf_expr.py
+++ b/elftools/dwarf/dwarf_expr.py

@@ -6,8 +6,7 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
-from cStringIO import StringIO
-
+from ..common.py3compat import StringIO, iteritems
 from ..common.utils import struct_parse, bytelist2string
 
 
@@ -86,7 +85,7 @@
 _generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_breg', 0, 31, 0x70)
 
 # opcode -> name mapping
-DW_OP_opcode2name = dict((v, k) for k, v in DW_OP_name2opcode.iteritems())
+DW_OP_opcode2name = dict((v, k) for k, v in iteritems(DW_OP_name2opcode))
 
 
 class GenericExprVisitor(object):

diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py
index dcec555..23349b4 100644
--- a/elftools/elf/elffile.py
+++ b/elftools/elf/elffile.py

@@ -6,7 +6,7 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
-from cStringIO import StringIO
+from ..common.py3compat import StringIO
 from ..common.exceptions import ELFError
 from ..common.utils import struct_parse, elf_assert
 from ..construct import ConstructError
@@ -26,7 +26,8 @@
         Accessible attributes:
 
             stream:
-                The stream holding the data of the file
+                The stream holding the data of the file - must be a binary
+                stream (bytes, not string).
 
             elfclass: 
                 32 or 64 - specifies the word size of the target machine
@@ -173,20 +174,20 @@
         #
         self.stream.seek(0)
         magic = self.stream.read(4)
-        elf_assert(magic == '\x7fELF', 'Magic number does not match')
+        elf_assert(magic == b'\x7fELF', 'Magic number does not match')
 
         ei_class = self.stream.read(1)
-        if ei_class == '\x01':
+        if ei_class == b'\x01':
             self.elfclass = 32
-        elif ei_class == '\x02':
+        elif ei_class == b'\x02':
             self.elfclass = 64
         else:
             raise ELFError('Invalid EI_CLASS %s' % repr(ei_class))
 
         ei_data = self.stream.read(1)
-        if ei_data == '\x01':
+        if ei_data == b'\x01':
             self.little_endian = True
-        elif ei_data == '\x02':
+        elif ei_data == b'\x02':
             self.little_endian = False
         else:
             raise ELFError('Invalid EI_DATA %s' % repr(ei_data))

diff --git a/z.py b/z.py
index 02d4f3f..d30feb3 100644
--- a/z.py
+++ b/z.py

@@ -1,6 +1,7 @@
 # Just a script for playing around with pyelftools during testing
 # please ignore it!
 #
+from __future__ import print_function
 
 import sys, pprint
 from elftools.elf.structs import ELFStructs
@@ -13,18 +14,18 @@
 stream = open('test/testfiles/exe_simple64.elf', 'rb')
 
 efile = ELFFile(stream)
-print 'elfclass', efile.elfclass
-print '===> %s sections!' % efile.num_sections() 
-print efile.header
+print('elfclass', efile.elfclass)
+print('===> %s sections!' % efile.num_sections())
+print(efile.header)
 
 dinfo = efile.get_dwarf_info()
 from elftools.dwarf.locationlists import LocationLists
 from elftools.dwarf.descriptions import describe_DWARF_expr
 llists = LocationLists(dinfo.debug_loc_sec.stream, dinfo.structs)
 for loclist in llists.iter_location_lists():
-    print '----> loclist!'
+    print('----> loclist!')
     for li in loclist:
-        print li
-        print describe_DWARF_expr(li.loc_expr, dinfo.structs)
+        print(li)
+        print(describe_DWARF_expr(li.loc_expr, dinfo.structs))
commit	eefac9a7102528afeb1e95475e921bd50e08471c	[log] [tgz]
author	Eli Bendersky <eliben@gmail.com>	Fri Jan 27 07:08:05 2012 +0200
committer	Eli Bendersky <eliben@gmail.com>	Fri Jan 27 07:08:05 2012 +0200
tree	25136396b81bad0538c182d23d1d12dfe7458df6
parent	3853a40a7d3fa0dd03251b26a9334fa5c01aca18 [diff]