blob: e461dffc63b2ee74294921dce440a39e46df128d [file] [log] [blame]
from core import *
from adapters import *
from macros import *
#===============================================================================
# exceptions
#===============================================================================
class QuotedStringError(ConstructError):
__slots__ = []
#===============================================================================
# constructs
#===============================================================================
class QuotedString(Construct):
r"""
A quoted string (begins with an opening-quote, terminated by a
closing-quote, which may be escaped by an escape character)
Parameters:
* name - the name of the field
* start_quote - the opening quote character. default is '"'
* end_quote - the closing quote character. default is '"'
* esc_char - the escape character, or None to disable escaping. defualt
is "\" (backslash)
* encoding - the character encoding (e.g., "utf8"), or None to return
raw bytes. defualt is None.
* allow_eof - whether to allow EOF before the closing quote is matched.
if False, an exception will be raised when EOF is reached by the closing
quote is missing. default is False.
Example:
QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None)
"""
__slots__ = [
"start_quote", "end_quote", "char", "esc_char", "encoding",
"allow_eof"
]
def __init__(self, name, start_quote = '"', end_quote = None,
esc_char = '\\', encoding = None, allow_eof = False):
Construct.__init__(self, name)
if end_quote is None:
end_quote = start_quote
self.start_quote = Literal(start_quote)
self.char = Char("char")
self.end_quote = end_quote
self.esc_char = esc_char
self.encoding = encoding
self.allow_eof = allow_eof
def _parse(self, stream, context):
self.start_quote._parse(stream, context)
text = []
escaped = False
try:
while True:
ch = self.char._parse(stream, context)
if ch == self.esc_char:
if escaped:
text.append(ch)
escaped = False
else:
escaped = True
elif ch == self.end_quote and not escaped:
break
else:
text.append(ch)
escaped = False
except FieldError:
if not self.allow_eof:
raise
text = "".join(text)
if self.encoding is not None:
text = text.decode(self.encoding)
return text
def _build(self, obj, stream, context):
self.start_quote._build(None, stream, context)
if self.encoding:
obj = obj.encode(self.encoding)
for ch in obj:
if ch == self.esc_char:
self.char._build(self.esc_char, stream, context)
elif ch == self.end_quote:
if self.esc_char is None:
raise QuotedStringError("found ending quote in data, "
"but no escape char defined", ch)
else:
self.char._build(self.esc_char, stream, context)
self.char._build(ch, stream, context)
self.char._build(self.end_quote, stream, context)
def _sizeof(self, context):
raise SizeofError("can't calculate size")
#===============================================================================
# macros
#===============================================================================
class WhitespaceAdapter(Adapter):
"""
Adapter for whitespace sequences; do not use directly.
See Whitespace.
Parameters:
* subcon - the subcon to adapt
* build_char - the character used for encoding (building)
"""
__slots__ = ["build_char"]
def __init__(self, subcon, build_char):
Adapter.__init__(self, subcon)
self.build_char = build_char
def _encode(self, obj, context):
return self.build_char
def _decode(self, obj, context):
return None
def Whitespace(charset = " \t", optional = True):
"""whitespace (space that is ignored between tokens). when building, the
first character of the charset is used.
* charset - the set of characters that are considered whitespace. default
is space and tab.
* optional - whether or not whitespace is optional. default is True.
"""
con = CharOf(None, charset)
if optional:
con = OptionalGreedyRange(con)
else:
con = GreedyRange(con)
return WhitespaceAdapter(con, build_char = charset[0])
def Literal(text):
"""matches a literal string in the text
* text - the text (string) to match
"""
return ConstAdapter(Field(None, len(text)), text)
def Char(name):
"""a one-byte character"""
return Field(name, 1)
def CharOf(name, charset):
"""matches only characters of a given charset
* name - the name of the field
* charset - the set of valid characters
"""
return OneOf(Char(name), charset)
def CharNoneOf(name, charset):
"""matches only characters that do not belong to a given charset
* name - the name of the field
* charset - the set of invalid characters
"""
return NoneOf(Char(name), charset)
def Alpha(name):
"""a letter character (A-Z, a-z)"""
return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'))
def Digit(name):
"""a digit character (0-9)"""
return CharOf(name, set('0123456789'))
def AlphaDigit(name):
"""an alphanumeric character (A-Z, a-z, 0-9)"""
return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
def BinDigit(name):
"""a binary digit (0-1)"""
return CharOf(name, set('01'))
def HexDigit(name):
"""a hexadecimal digit (0-9, A-F, a-f)"""
return CharOf(name, set('0123456789abcdefABCDEF'))
def Word(name):
"""a sequence of letters"""
return StringAdapter(GreedyRange(Alpha(name)))
class TextualIntAdapter(Adapter):
"""
Adapter for textual integers
Parameters:
* subcon - the subcon to adapt
* radix - the base of the integer (decimal, hexadecimal, binary, ...)
* digits - the sequence of digits of that radix
"""
__slots__ = ["radix", "digits"]
def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"):
Adapter.__init__(self, subcon)
if radix > len(digits):
raise ValueError("not enough digits for radix %d" % (radix,))
self.radix = radix
self.digits = digits
def _encode(self, obj, context):
chars = []
if obj < 0:
chars.append("-")
n = -obj
else:
n = obj
r = self.radix
digs = self.digits
while n > 0:
n, d = divmod(n, r)
chars.append(digs[d])
# obj2 = "".join(reversed(chars))
# filler = digs[0] * (self._sizeof(context) - len(obj2))
# return filler + obj2
return "".join(reversed(chars))
def _decode(self, obj, context):
return int("".join(obj), self.radix)
def DecNumber(name):
"""decimal number"""
return TextualIntAdapter(GreedyRange(Digit(name)))
def BinNumber(name):
"""binary number"""
return TextualIntAdapter(GreedyRange(Digit(name)), 2)
def HexNumber(name):
"""hexadecimal number"""
return TextualIntAdapter(GreedyRange(Digit(name)), 16)
def StringUpto(name, charset):
"""a string that stretches up to a terminator, or EOF. unlike CString,
StringUpto will no consume the terminator char.
* name - the name of the field
* charset - the set of terminator characters"""
return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset)))
def Line(name):
r"""a textual line (up to "\n")"""
return StringUpto(name, "\n")
class IdentifierAdapter(Adapter):
"""
Adapter for programmatic identifiers
Parameters:
* subcon - the subcon to adapt
"""
def _encode(self, obj, context):
return obj[0], obj[1:]
def _decode(self, obj, context):
return obj[0] + "".join(obj[1])
def Identifier(name,
headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"),
tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
):
"""a programmatic identifier (symbol). must start with a char of headset,
followed by a sequence of tailset characters
* name - the name of the field
* headset - charset for the first character. default is A-Z, a-z, and _
* tailset - charset for the tail. default is A-Z, a-z, 0-9 and _
"""
return IdentifierAdapter(
Sequence(name,
CharOf("head", headset),
OptionalGreedyRange(CharOf("tail", tailset)),
)
)