elftools/construct/text.py - mirrors/cros/chromiumos/third_party/pyelftools - Git at Google

 from core import *
 from adapters import *
 from macros import *


 #===============================================================================
 # exceptions
 #===============================================================================
 class QuotedStringError(ConstructError):
     __slots__ = []


 #===============================================================================
 # constructs
 #===============================================================================
 class QuotedString(Construct):
     r"""
     A quoted string (begins with an opening-quote, terminated by a
     closing-quote, which may be escaped by an escape character)

     Parameters:
     * name - the name of the field
     * start_quote - the opening quote character. default is '"'
     * end_quote - the closing quote character. default is '"'
     * esc_char - the escape character, or None to disable escaping. defualt
       is "\" (backslash)
     * encoding - the character encoding (e.g., "utf8"), or None to return
       raw bytes. defualt is None.
     * allow_eof - whether to allow EOF before the closing quote is matched.
       if False, an exception will be raised when EOF is reached by the closing
       quote is missing. default is False.

     Example:
     QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None)
     """
     __slots__ = [
         "start_quote", "end_quote", "char", "esc_char", "encoding",
         "allow_eof"
     ]
     def __init__(self, name, start_quote = '"', end_quote = None,
                  esc_char = '\\', encoding = None, allow_eof = False):
         Construct.__init__(self, name)
         if end_quote is None:
             end_quote = start_quote
         self.start_quote = Literal(start_quote)
         self.char = Char("char")
         self.end_quote = end_quote
         self.esc_char = esc_char
         self.encoding = encoding
         self.allow_eof = allow_eof

     def _parse(self, stream, context):
         self.start_quote._parse(stream, context)
         text = []
         escaped = False
         try:
             while True:
                 ch = self.char._parse(stream, context)
                 if ch == self.esc_char:
                     if escaped:
                         text.append(ch)
                         escaped = False
                     else:
                         escaped = True
                 elif ch == self.end_quote and not escaped:
                     break
                 else:
                     text.append(ch)
                     escaped = False
         except FieldError:
             if not self.allow_eof:
                 raise
         text = "".join(text)
         if self.encoding is not None:
             text = text.decode(self.encoding)
         return text

     def _build(self, obj, stream, context):
         self.start_quote._build(None, stream, context)
         if self.encoding:
             obj = obj.encode(self.encoding)
         for ch in obj:
             if ch == self.esc_char:
                 self.char._build(self.esc_char, stream, context)
             elif ch == self.end_quote:
                 if self.esc_char is None:
                     raise QuotedStringError("found ending quote in data, "
                         "but no escape char defined", ch)
                 else:
                     self.char._build(self.esc_char, stream, context)
             self.char._build(ch, stream, context)
         self.char._build(self.end_quote, stream, context)

     def _sizeof(self, context):
         raise SizeofError("can't calculate size")


 #===============================================================================
 # macros
 #===============================================================================
 class WhitespaceAdapter(Adapter):
     """
     Adapter for whitespace sequences; do not use directly.
     See Whitespace.

     Parameters:
     * subcon - the subcon to adapt
     * build_char - the character used for encoding (building)
     """
     __slots__ = ["build_char"]
     def __init__(self, subcon, build_char):
         Adapter.__init__(self, subcon)
         self.build_char = build_char
     def _encode(self, obj, context):
         return self.build_char
     def _decode(self, obj, context):
         return None

 def Whitespace(charset = " \t", optional = True):
     """whitespace (space that is ignored between tokens). when building, the
     first character of the charset is used.
     * charset - the set of characters that are considered whitespace. default
       is space and tab.
     * optional - whether or not whitespace is optional. default is True.
     """
     con = CharOf(None, charset)
     if optional:
         con = OptionalGreedyRange(con)
     else:
         con = GreedyRange(con)
     return WhitespaceAdapter(con, build_char = charset[0])

 def Literal(text):
     """matches a literal string in the text
     * text - the text (string) to match
     """
     return ConstAdapter(Field(None, len(text)), text)

 def Char(name):
     """a one-byte character"""
     return Field(name, 1)

 def CharOf(name, charset):
     """matches only characters of a given charset
     * name - the name of the field
     * charset - the set of valid characters
     """
     return OneOf(Char(name), charset)

 def CharNoneOf(name, charset):
     """matches only characters that do not belong to a given charset
     * name - the name of the field
     * charset - the set of invalid characters
     """
     return NoneOf(Char(name), charset)

 def Alpha(name):
     """a letter character (A-Z, a-z)"""
     return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'))

 def Digit(name):
     """a digit character (0-9)"""
     return CharOf(name, set('0123456789'))

 def AlphaDigit(name):
     """an alphanumeric character (A-Z, a-z, 0-9)"""
     return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))

 def BinDigit(name):
     """a binary digit (0-1)"""
     return CharOf(name, set('01'))

 def HexDigit(name):
     """a hexadecimal digit (0-9, A-F, a-f)"""
     return CharOf(name, set('0123456789abcdefABCDEF'))

 def Word(name):
     """a sequence of letters"""
     return StringAdapter(GreedyRange(Alpha(name)))

 class TextualIntAdapter(Adapter):
     """
     Adapter for textual integers

     Parameters:
     * subcon - the subcon to adapt
     * radix - the base of the integer (decimal, hexadecimal, binary, ...)
     * digits - the sequence of digits of that radix
     """
     __slots__ = ["radix", "digits"]
     def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"):
         Adapter.__init__(self, subcon)
         if radix > len(digits):
             raise ValueError("not enough digits for radix %d" % (radix,))
         self.radix = radix
         self.digits = digits
     def _encode(self, obj, context):
         chars = []
         if obj < 0:
             chars.append("-")
             n = -obj
         else:
             n = obj
         r = self.radix
         digs = self.digits
         while n > 0:
             n, d = divmod(n, r)
             chars.append(digs[d])
         # obj2 = "".join(reversed(chars))
         # filler = digs[0] * (self._sizeof(context) - len(obj2))
         # return filler + obj2
         return "".join(reversed(chars))
     def _decode(self, obj, context):
         return int("".join(obj), self.radix)

 def DecNumber(name):
     """decimal number"""
     return TextualIntAdapter(GreedyRange(Digit(name)))

 def BinNumber(name):
     """binary number"""
     return TextualIntAdapter(GreedyRange(Digit(name)), 2)

 def HexNumber(name):
     """hexadecimal number"""
     return TextualIntAdapter(GreedyRange(Digit(name)), 16)

 def StringUpto(name, charset):
     """a string that stretches up to a terminator, or EOF. unlike CString,
     StringUpto will no consume the terminator char.
     * name - the name of the field
     * charset - the set of terminator characters"""
     return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset)))

 def Line(name):
     r"""a textual line (up to "\n")"""
     return StringUpto(name, "\n")

 class IdentifierAdapter(Adapter):
     """
     Adapter for programmatic identifiers

     Parameters:
     * subcon - the subcon to adapt
     """
     def _encode(self, obj, context):
         return obj[0], obj[1:]
     def _decode(self, obj, context):
         return obj[0] + "".join(obj[1])

 def Identifier(name,
                headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"),
                tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
     ):
     """a programmatic identifier (symbol). must start with a char of headset,
     followed by a sequence of tailset characters
     * name - the name of the field
     * headset - charset for the first character. default is A-Z, a-z, and _
     * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _
     """
     return IdentifierAdapter(
         Sequence(name,
             CharOf("head", headset),
             OptionalGreedyRange(CharOf("tail", tailset)),
         )
     )
	from core import *
	from adapters import *
	from macros import *


	#===============================================================================
	# exceptions
	#===============================================================================
	class QuotedStringError(ConstructError):
	__slots__ = []


	#===============================================================================
	# constructs
	#===============================================================================
	class QuotedString(Construct):
	r"""
	A quoted string (begins with an opening-quote, terminated by a
	closing-quote, which may be escaped by an escape character)

	Parameters:
	* name - the name of the field
	* start_quote - the opening quote character. default is '"'
	* end_quote - the closing quote character. default is '"'
	* esc_char - the escape character, or None to disable escaping. defualt
	is "\" (backslash)
	* encoding - the character encoding (e.g., "utf8"), or None to return
	raw bytes. defualt is None.
	* allow_eof - whether to allow EOF before the closing quote is matched.
	if False, an exception will be raised when EOF is reached by the closing
	quote is missing. default is False.

	Example:
	QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None)
	"""
	__slots__ = [
	"start_quote", "end_quote", "char", "esc_char", "encoding",
	"allow_eof"
	]
	def __init__(self, name, start_quote = '"', end_quote = None,
	esc_char = '\\', encoding = None, allow_eof = False):
	Construct.__init__(self, name)
	if end_quote is None:
	end_quote = start_quote
	self.start_quote = Literal(start_quote)
	self.char = Char("char")
	self.end_quote = end_quote
	self.esc_char = esc_char
	self.encoding = encoding
	self.allow_eof = allow_eof

	def _parse(self, stream, context):
	self.start_quote._parse(stream, context)
	text = []
	escaped = False
	try:
	while True:
	ch = self.char._parse(stream, context)
	if ch == self.esc_char:
	if escaped:
	text.append(ch)
	escaped = False
	else:
	escaped = True
	elif ch == self.end_quote and not escaped:
	break
	else:
	text.append(ch)
	escaped = False
	except FieldError:
	if not self.allow_eof:
	raise
	text = "".join(text)
	if self.encoding is not None:
	text = text.decode(self.encoding)
	return text

	def _build(self, obj, stream, context):
	self.start_quote._build(None, stream, context)
	if self.encoding:
	obj = obj.encode(self.encoding)
	for ch in obj:
	if ch == self.esc_char:
	self.char._build(self.esc_char, stream, context)
	elif ch == self.end_quote:
	if self.esc_char is None:
	raise QuotedStringError("found ending quote in data, "
	"but no escape char defined", ch)
	else:
	self.char._build(self.esc_char, stream, context)
	self.char._build(ch, stream, context)
	self.char._build(self.end_quote, stream, context)

	def _sizeof(self, context):
	raise SizeofError("can't calculate size")


	#===============================================================================
	# macros
	#===============================================================================
	class WhitespaceAdapter(Adapter):
	"""
	Adapter for whitespace sequences; do not use directly.
	See Whitespace.

	Parameters:
	* subcon - the subcon to adapt
	* build_char - the character used for encoding (building)
	"""
	__slots__ = ["build_char"]
	def __init__(self, subcon, build_char):
	Adapter.__init__(self, subcon)
	self.build_char = build_char
	def _encode(self, obj, context):
	return self.build_char
	def _decode(self, obj, context):
	return None

	def Whitespace(charset = " \t", optional = True):
	"""whitespace (space that is ignored between tokens). when building, the
	first character of the charset is used.
	* charset - the set of characters that are considered whitespace. default
	is space and tab.
	* optional - whether or not whitespace is optional. default is True.
	"""
	con = CharOf(None, charset)
	if optional:
	con = OptionalGreedyRange(con)
	else:
	con = GreedyRange(con)
	return WhitespaceAdapter(con, build_char = charset[0])

	def Literal(text):
	"""matches a literal string in the text
	* text - the text (string) to match
	"""
	return ConstAdapter(Field(None, len(text)), text)

	def Char(name):
	"""a one-byte character"""
	return Field(name, 1)

	def CharOf(name, charset):
	"""matches only characters of a given charset
	* name - the name of the field
	* charset - the set of valid characters
	"""
	return OneOf(Char(name), charset)

	def CharNoneOf(name, charset):
	"""matches only characters that do not belong to a given charset
	* name - the name of the field
	* charset - the set of invalid characters
	"""
	return NoneOf(Char(name), charset)

	def Alpha(name):
	"""a letter character (A-Z, a-z)"""
	return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'))

	def Digit(name):
	"""a digit character (0-9)"""
	return CharOf(name, set('0123456789'))

	def AlphaDigit(name):
	"""an alphanumeric character (A-Z, a-z, 0-9)"""
	return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))

	def BinDigit(name):
	"""a binary digit (0-1)"""
	return CharOf(name, set('01'))

	def HexDigit(name):
	"""a hexadecimal digit (0-9, A-F, a-f)"""
	return CharOf(name, set('0123456789abcdefABCDEF'))

	def Word(name):
	"""a sequence of letters"""
	return StringAdapter(GreedyRange(Alpha(name)))

	class TextualIntAdapter(Adapter):
	"""
	Adapter for textual integers

	Parameters:
	* subcon - the subcon to adapt
	* radix - the base of the integer (decimal, hexadecimal, binary, ...)
	* digits - the sequence of digits of that radix
	"""
	__slots__ = ["radix", "digits"]
	def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"):
	Adapter.__init__(self, subcon)
	if radix > len(digits):
	raise ValueError("not enough digits for radix %d" % (radix,))
	self.radix = radix
	self.digits = digits
	def _encode(self, obj, context):
	chars = []
	if obj < 0:
	chars.append("-")
	n = -obj
	else:
	n = obj
	r = self.radix
	digs = self.digits
	while n > 0:
	n, d = divmod(n, r)
	chars.append(digs[d])
	# obj2 = "".join(reversed(chars))
	# filler = digs[0] * (self._sizeof(context) - len(obj2))
	# return filler + obj2
	return "".join(reversed(chars))
	def _decode(self, obj, context):
	return int("".join(obj), self.radix)

	def DecNumber(name):
	"""decimal number"""
	return TextualIntAdapter(GreedyRange(Digit(name)))

	def BinNumber(name):
	"""binary number"""
	return TextualIntAdapter(GreedyRange(Digit(name)), 2)

	def HexNumber(name):
	"""hexadecimal number"""
	return TextualIntAdapter(GreedyRange(Digit(name)), 16)

	def StringUpto(name, charset):
	"""a string that stretches up to a terminator, or EOF. unlike CString,
	StringUpto will no consume the terminator char.
	* name - the name of the field
	* charset - the set of terminator characters"""
	return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset)))

	def Line(name):
	r"""a textual line (up to "\n")"""
	return StringUpto(name, "\n")

	class IdentifierAdapter(Adapter):
	"""
	Adapter for programmatic identifiers

	Parameters:
	* subcon - the subcon to adapt
	"""
	def _encode(self, obj, context):
	return obj[0], obj[1:]
	def _decode(self, obj, context):
	return obj[0] + "".join(obj[1])

	def Identifier(name,
	headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"),
	tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
	):
	"""a programmatic identifier (symbol). must start with a char of headset,
	followed by a sequence of tailset characters
	* name - the name of the field
	* headset - charset for the first character. default is A-Z, a-z, and _
	* tailset - charset for the tail. default is A-Z, a-z, 0-9 and _
	"""
	return IdentifierAdapter(
	Sequence(name,
	CharOf("head", headset),
	OptionalGreedyRange(CharOf("tail", tailset)),
	)
	)