initial efforts porting pyelftools to python 3. some basic ELF functionality working
diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py
index d3e311a..53caa97 100644
--- a/elftools/common/construct_utils.py
+++ b/elftools/common/construct_utils.py
@@ -35,7 +35,7 @@
if self.predicate(subobj, context):
break
obj.append(subobj)
- except ConstructError, ex:
+ except ConstructError as ex:
raise ArrayError("missing terminator", ex)
return obj
def _build(self, obj, stream, context):
diff --git a/elftools/common/ordereddict.py b/elftools/common/ordereddict.py
index aabeafc..5e0f142 100644
--- a/elftools/common/ordereddict.py
+++ b/elftools/common/ordereddict.py
@@ -262,21 +262,3 @@
"od.viewitems() -> a set-like object providing a view on od's items"
return ItemsView(self)
-
-
-
-#-------------------------------------------------------------------------------
-if __name__ == "__main__":
- od = OrderedDict()
- d = dict()
-
- for key in ['joe', 'more', 'tem', 'opsdf', 'dsf']:
- od[key] = d[key] = key + '1'
-
- for k in d:
- print k, d[k]
-
- print '-------- ordered ----------'
-
- for k in od:
- print k, od[k]
diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py
new file mode 100644
index 0000000..bd08bd2
--- /dev/null
+++ b/elftools/common/py3compat.py
@@ -0,0 +1,40 @@
+#-------------------------------------------------------------------------------
+# elftools: common/py3compat.py
+#
+# Python 3 compatibility code
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import sys
+PY3 = sys.version_info[0] == 3
+
+
+if PY3:
+ import io
+ StringIO = io.StringIO
+ BytesIO = io.BytesIO
+
+ import collections
+ OrderedDict = collections.OrderedDict
+
+ _iterkeys = "keys"
+ _iteritems = "items"
+else:
+ import cStringIO
+ StringIO = BytesIO = cStringIO.StringIO
+
+ from .ordereddict import OrderedDict
+
+ _iterkeys = "iterkeys"
+ _iteritems = "iteritems"
+
+
+def iterkeys(d):
+ """Return an iterator over the keys of a dictionary."""
+ return getattr(d, _iterkeys)()
+
+def iteritems(d):
+ """Return an iterator over the items of a dictionary."""
+ return getattr(d, _iteritems)()
+
diff --git a/elftools/construct/lib/utils.py b/elftools/construct/lib/utils.py
deleted file mode 100644
index 86d8b03..0000000
--- a/elftools/construct/lib/utils.py
+++ /dev/null
@@ -1,22 +0,0 @@
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
-
-
-try:
- from struct import Struct as Packer
-except ImportError:
- from struct import pack, unpack, calcsize
- class Packer(object):
- __slots__ = ["format", "size"]
- def __init__(self, format):
- self.format = format
- self.size = calcsize(format)
- def pack(self, *args):
- return pack(self.format, *args)
- def unpack(self, data):
- return unpack(self.format, data)
-
-
-
diff --git a/elftools/construct/text.py b/elftools/construct/text.py
deleted file mode 100644
index e461dff..0000000
--- a/elftools/construct/text.py
+++ /dev/null
@@ -1,286 +0,0 @@
-from core import *
-from adapters import *
-from macros import *
-
-
-#===============================================================================
-# exceptions
-#===============================================================================
-class QuotedStringError(ConstructError):
- __slots__ = []
-
-
-#===============================================================================
-# constructs
-#===============================================================================
-class QuotedString(Construct):
- r"""
- A quoted string (begins with an opening-quote, terminated by a
- closing-quote, which may be escaped by an escape character)
-
- Parameters:
- * name - the name of the field
- * start_quote - the opening quote character. default is '"'
- * end_quote - the closing quote character. default is '"'
- * esc_char - the escape character, or None to disable escaping. defualt
- is "\" (backslash)
- * encoding - the character encoding (e.g., "utf8"), or None to return
- raw bytes. defualt is None.
- * allow_eof - whether to allow EOF before the closing quote is matched.
- if False, an exception will be raised when EOF is reached by the closing
- quote is missing. default is False.
-
- Example:
- QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None)
- """
- __slots__ = [
- "start_quote", "end_quote", "char", "esc_char", "encoding",
- "allow_eof"
- ]
- def __init__(self, name, start_quote = '"', end_quote = None,
- esc_char = '\\', encoding = None, allow_eof = False):
- Construct.__init__(self, name)
- if end_quote is None:
- end_quote = start_quote
- self.start_quote = Literal(start_quote)
- self.char = Char("char")
- self.end_quote = end_quote
- self.esc_char = esc_char
- self.encoding = encoding
- self.allow_eof = allow_eof
-
- def _parse(self, stream, context):
- self.start_quote._parse(stream, context)
- text = []
- escaped = False
- try:
- while True:
- ch = self.char._parse(stream, context)
- if ch == self.esc_char:
- if escaped:
- text.append(ch)
- escaped = False
- else:
- escaped = True
- elif ch == self.end_quote and not escaped:
- break
- else:
- text.append(ch)
- escaped = False
- except FieldError:
- if not self.allow_eof:
- raise
- text = "".join(text)
- if self.encoding is not None:
- text = text.decode(self.encoding)
- return text
-
- def _build(self, obj, stream, context):
- self.start_quote._build(None, stream, context)
- if self.encoding:
- obj = obj.encode(self.encoding)
- for ch in obj:
- if ch == self.esc_char:
- self.char._build(self.esc_char, stream, context)
- elif ch == self.end_quote:
- if self.esc_char is None:
- raise QuotedStringError("found ending quote in data, "
- "but no escape char defined", ch)
- else:
- self.char._build(self.esc_char, stream, context)
- self.char._build(ch, stream, context)
- self.char._build(self.end_quote, stream, context)
-
- def _sizeof(self, context):
- raise SizeofError("can't calculate size")
-
-
-#===============================================================================
-# macros
-#===============================================================================
-class WhitespaceAdapter(Adapter):
- """
- Adapter for whitespace sequences; do not use directly.
- See Whitespace.
-
- Parameters:
- * subcon - the subcon to adapt
- * build_char - the character used for encoding (building)
- """
- __slots__ = ["build_char"]
- def __init__(self, subcon, build_char):
- Adapter.__init__(self, subcon)
- self.build_char = build_char
- def _encode(self, obj, context):
- return self.build_char
- def _decode(self, obj, context):
- return None
-
-def Whitespace(charset = " \t", optional = True):
- """whitespace (space that is ignored between tokens). when building, the
- first character of the charset is used.
- * charset - the set of characters that are considered whitespace. default
- is space and tab.
- * optional - whether or not whitespace is optional. default is True.
- """
- con = CharOf(None, charset)
- if optional:
- con = OptionalGreedyRange(con)
- else:
- con = GreedyRange(con)
- return WhitespaceAdapter(con, build_char = charset[0])
-
-def Literal(text):
- """matches a literal string in the text
- * text - the text (string) to match
- """
- return ConstAdapter(Field(None, len(text)), text)
-
-def Char(name):
- """a one-byte character"""
- return Field(name, 1)
-
-def CharOf(name, charset):
- """matches only characters of a given charset
- * name - the name of the field
- * charset - the set of valid characters
- """
- return OneOf(Char(name), charset)
-
-def CharNoneOf(name, charset):
- """matches only characters that do not belong to a given charset
- * name - the name of the field
- * charset - the set of invalid characters
- """
- return NoneOf(Char(name), charset)
-
-def Alpha(name):
- """a letter character (A-Z, a-z)"""
- return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'))
-
-def Digit(name):
- """a digit character (0-9)"""
- return CharOf(name, set('0123456789'))
-
-def AlphaDigit(name):
- """an alphanumeric character (A-Z, a-z, 0-9)"""
- return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
-
-def BinDigit(name):
- """a binary digit (0-1)"""
- return CharOf(name, set('01'))
-
-def HexDigit(name):
- """a hexadecimal digit (0-9, A-F, a-f)"""
- return CharOf(name, set('0123456789abcdefABCDEF'))
-
-def Word(name):
- """a sequence of letters"""
- return StringAdapter(GreedyRange(Alpha(name)))
-
-class TextualIntAdapter(Adapter):
- """
- Adapter for textual integers
-
- Parameters:
- * subcon - the subcon to adapt
- * radix - the base of the integer (decimal, hexadecimal, binary, ...)
- * digits - the sequence of digits of that radix
- """
- __slots__ = ["radix", "digits"]
- def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"):
- Adapter.__init__(self, subcon)
- if radix > len(digits):
- raise ValueError("not enough digits for radix %d" % (radix,))
- self.radix = radix
- self.digits = digits
- def _encode(self, obj, context):
- chars = []
- if obj < 0:
- chars.append("-")
- n = -obj
- else:
- n = obj
- r = self.radix
- digs = self.digits
- while n > 0:
- n, d = divmod(n, r)
- chars.append(digs[d])
- # obj2 = "".join(reversed(chars))
- # filler = digs[0] * (self._sizeof(context) - len(obj2))
- # return filler + obj2
- return "".join(reversed(chars))
- def _decode(self, obj, context):
- return int("".join(obj), self.radix)
-
-def DecNumber(name):
- """decimal number"""
- return TextualIntAdapter(GreedyRange(Digit(name)))
-
-def BinNumber(name):
- """binary number"""
- return TextualIntAdapter(GreedyRange(Digit(name)), 2)
-
-def HexNumber(name):
- """hexadecimal number"""
- return TextualIntAdapter(GreedyRange(Digit(name)), 16)
-
-def StringUpto(name, charset):
- """a string that stretches up to a terminator, or EOF. unlike CString,
- StringUpto will no consume the terminator char.
- * name - the name of the field
- * charset - the set of terminator characters"""
- return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset)))
-
-def Line(name):
- r"""a textual line (up to "\n")"""
- return StringUpto(name, "\n")
-
-class IdentifierAdapter(Adapter):
- """
- Adapter for programmatic identifiers
-
- Parameters:
- * subcon - the subcon to adapt
- """
- def _encode(self, obj, context):
- return obj[0], obj[1:]
- def _decode(self, obj, context):
- return obj[0] + "".join(obj[1])
-
-def Identifier(name,
- headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"),
- tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
- ):
- """a programmatic identifier (symbol). must start with a char of headset,
- followed by a sequence of tailset characters
- * name - the name of the field
- * headset - charset for the first character. default is A-Z, a-z, and _
- * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _
- """
- return IdentifierAdapter(
- Sequence(name,
- CharOf("head", headset),
- OptionalGreedyRange(CharOf("tail", tailset)),
- )
- )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py
index f9a4500..de67a43 100644
--- a/elftools/dwarf/callframe.py
+++ b/elftools/dwarf/callframe.py
@@ -9,6 +9,7 @@
import copy
from collections import namedtuple
from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
+from ..common.py3compat import iterkeys
from .structs import DWARFStructs
from .constants import *
@@ -434,7 +435,7 @@
# for DW_CFA_* instructions, and mapping their values to names. Since all
# names were imported from constants with `import *`, we look in globals()
_OPCODE_NAME_MAP = {}
-for name in list(globals().iterkeys()):
+for name in list(iterkeys(globals())):
if name.startswith('DW_CFA'):
_OPCODE_NAME_MAP[globals()[name]] = name
diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py
index fe4537b..f0b5eb8 100644
--- a/elftools/dwarf/die.py
+++ b/elftools/dwarf/die.py
@@ -8,7 +8,7 @@
#-------------------------------------------------------------------------------
from collections import namedtuple
-from ..common.ordereddict import OrderedDict
+from ..common.py3compat import OrderedDict
from ..common.utils import struct_parse, preserve_stream_pos
diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py
index eccd486..267c1c7 100644
--- a/elftools/dwarf/dwarf_expr.py
+++ b/elftools/dwarf/dwarf_expr.py
@@ -6,8 +6,7 @@
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
-from cStringIO import StringIO
-
+from ..common.py3compat import StringIO, iteritems
from ..common.utils import struct_parse, bytelist2string
@@ -86,7 +85,7 @@
_generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_breg', 0, 31, 0x70)
# opcode -> name mapping
-DW_OP_opcode2name = dict((v, k) for k, v in DW_OP_name2opcode.iteritems())
+DW_OP_opcode2name = dict((v, k) for k, v in iteritems(DW_OP_name2opcode))
class GenericExprVisitor(object):
diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py
index dcec555..23349b4 100644
--- a/elftools/elf/elffile.py
+++ b/elftools/elf/elffile.py
@@ -6,7 +6,7 @@
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
-from cStringIO import StringIO
+from ..common.py3compat import StringIO
from ..common.exceptions import ELFError
from ..common.utils import struct_parse, elf_assert
from ..construct import ConstructError
@@ -26,7 +26,8 @@
Accessible attributes:
stream:
- The stream holding the data of the file
+ The stream holding the data of the file - must be a binary
+ stream (bytes, not string).
elfclass:
32 or 64 - specifies the word size of the target machine
@@ -173,20 +174,20 @@
#
self.stream.seek(0)
magic = self.stream.read(4)
- elf_assert(magic == '\x7fELF', 'Magic number does not match')
+ elf_assert(magic == b'\x7fELF', 'Magic number does not match')
ei_class = self.stream.read(1)
- if ei_class == '\x01':
+ if ei_class == b'\x01':
self.elfclass = 32
- elif ei_class == '\x02':
+ elif ei_class == b'\x02':
self.elfclass = 64
else:
raise ELFError('Invalid EI_CLASS %s' % repr(ei_class))
ei_data = self.stream.read(1)
- if ei_data == '\x01':
+ if ei_data == b'\x01':
self.little_endian = True
- elif ei_data == '\x02':
+ elif ei_data == b'\x02':
self.little_endian = False
else:
raise ELFError('Invalid EI_DATA %s' % repr(ei_data))
diff --git a/z.py b/z.py
index 02d4f3f..d30feb3 100644
--- a/z.py
+++ b/z.py
@@ -1,6 +1,7 @@
# Just a script for playing around with pyelftools during testing
# please ignore it!
#
+from __future__ import print_function
import sys, pprint
from elftools.elf.structs import ELFStructs
@@ -13,18 +14,18 @@
stream = open('test/testfiles/exe_simple64.elf', 'rb')
efile = ELFFile(stream)
-print 'elfclass', efile.elfclass
-print '===> %s sections!' % efile.num_sections()
-print efile.header
+print('elfclass', efile.elfclass)
+print('===> %s sections!' % efile.num_sections())
+print(efile.header)
dinfo = efile.get_dwarf_info()
from elftools.dwarf.locationlists import LocationLists
from elftools.dwarf.descriptions import describe_DWARF_expr
llists = LocationLists(dinfo.debug_loc_sec.stream, dinfo.structs)
for loclist in llists.iter_location_lists():
- print '----> loclist!'
+ print('----> loclist!')
for li in loclist:
- print li
- print describe_DWARF_expr(li.loc_expr, dinfo.structs)
+ print(li)
+ print(describe_DWARF_expr(li.loc_expr, dinfo.structs))