| from core import * |
| from adapters import * |
| from macros import * |
| |
| |
| #=============================================================================== |
| # exceptions |
| #=============================================================================== |
| class QuotedStringError(ConstructError): |
| __slots__ = [] |
| |
| |
| #=============================================================================== |
| # constructs |
| #=============================================================================== |
| class QuotedString(Construct): |
| r""" |
| A quoted string (begins with an opening-quote, terminated by a |
| closing-quote, which may be escaped by an escape character) |
| |
| Parameters: |
| * name - the name of the field |
| * start_quote - the opening quote character. default is '"' |
| * end_quote - the closing quote character. default is '"' |
| * esc_char - the escape character, or None to disable escaping. defualt |
| is "\" (backslash) |
| * encoding - the character encoding (e.g., "utf8"), or None to return |
| raw bytes. defualt is None. |
| * allow_eof - whether to allow EOF before the closing quote is matched. |
| if False, an exception will be raised when EOF is reached by the closing |
| quote is missing. default is False. |
| |
| Example: |
| QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None) |
| """ |
| __slots__ = [ |
| "start_quote", "end_quote", "char", "esc_char", "encoding", |
| "allow_eof" |
| ] |
| def __init__(self, name, start_quote = '"', end_quote = None, |
| esc_char = '\\', encoding = None, allow_eof = False): |
| Construct.__init__(self, name) |
| if end_quote is None: |
| end_quote = start_quote |
| self.start_quote = Literal(start_quote) |
| self.char = Char("char") |
| self.end_quote = end_quote |
| self.esc_char = esc_char |
| self.encoding = encoding |
| self.allow_eof = allow_eof |
| |
| def _parse(self, stream, context): |
| self.start_quote._parse(stream, context) |
| text = [] |
| escaped = False |
| try: |
| while True: |
| ch = self.char._parse(stream, context) |
| if ch == self.esc_char: |
| if escaped: |
| text.append(ch) |
| escaped = False |
| else: |
| escaped = True |
| elif ch == self.end_quote and not escaped: |
| break |
| else: |
| text.append(ch) |
| escaped = False |
| except FieldError: |
| if not self.allow_eof: |
| raise |
| text = "".join(text) |
| if self.encoding is not None: |
| text = text.decode(self.encoding) |
| return text |
| |
| def _build(self, obj, stream, context): |
| self.start_quote._build(None, stream, context) |
| if self.encoding: |
| obj = obj.encode(self.encoding) |
| for ch in obj: |
| if ch == self.esc_char: |
| self.char._build(self.esc_char, stream, context) |
| elif ch == self.end_quote: |
| if self.esc_char is None: |
| raise QuotedStringError("found ending quote in data, " |
| "but no escape char defined", ch) |
| else: |
| self.char._build(self.esc_char, stream, context) |
| self.char._build(ch, stream, context) |
| self.char._build(self.end_quote, stream, context) |
| |
| def _sizeof(self, context): |
| raise SizeofError("can't calculate size") |
| |
| |
| #=============================================================================== |
| # macros |
| #=============================================================================== |
| class WhitespaceAdapter(Adapter): |
| """ |
| Adapter for whitespace sequences; do not use directly. |
| See Whitespace. |
| |
| Parameters: |
| * subcon - the subcon to adapt |
| * build_char - the character used for encoding (building) |
| """ |
| __slots__ = ["build_char"] |
| def __init__(self, subcon, build_char): |
| Adapter.__init__(self, subcon) |
| self.build_char = build_char |
| def _encode(self, obj, context): |
| return self.build_char |
| def _decode(self, obj, context): |
| return None |
| |
| def Whitespace(charset = " \t", optional = True): |
| """whitespace (space that is ignored between tokens). when building, the |
| first character of the charset is used. |
| * charset - the set of characters that are considered whitespace. default |
| is space and tab. |
| * optional - whether or not whitespace is optional. default is True. |
| """ |
| con = CharOf(None, charset) |
| if optional: |
| con = OptionalGreedyRange(con) |
| else: |
| con = GreedyRange(con) |
| return WhitespaceAdapter(con, build_char = charset[0]) |
| |
| def Literal(text): |
| """matches a literal string in the text |
| * text - the text (string) to match |
| """ |
| return ConstAdapter(Field(None, len(text)), text) |
| |
| def Char(name): |
| """a one-byte character""" |
| return Field(name, 1) |
| |
| def CharOf(name, charset): |
| """matches only characters of a given charset |
| * name - the name of the field |
| * charset - the set of valid characters |
| """ |
| return OneOf(Char(name), charset) |
| |
| def CharNoneOf(name, charset): |
| """matches only characters that do not belong to a given charset |
| * name - the name of the field |
| * charset - the set of invalid characters |
| """ |
| return NoneOf(Char(name), charset) |
| |
| def Alpha(name): |
| """a letter character (A-Z, a-z)""" |
| return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) |
| |
| def Digit(name): |
| """a digit character (0-9)""" |
| return CharOf(name, set('0123456789')) |
| |
| def AlphaDigit(name): |
| """an alphanumeric character (A-Z, a-z, 0-9)""" |
| return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")) |
| |
| def BinDigit(name): |
| """a binary digit (0-1)""" |
| return CharOf(name, set('01')) |
| |
| def HexDigit(name): |
| """a hexadecimal digit (0-9, A-F, a-f)""" |
| return CharOf(name, set('0123456789abcdefABCDEF')) |
| |
| def Word(name): |
| """a sequence of letters""" |
| return StringAdapter(GreedyRange(Alpha(name))) |
| |
| class TextualIntAdapter(Adapter): |
| """ |
| Adapter for textual integers |
| |
| Parameters: |
| * subcon - the subcon to adapt |
| * radix - the base of the integer (decimal, hexadecimal, binary, ...) |
| * digits - the sequence of digits of that radix |
| """ |
| __slots__ = ["radix", "digits"] |
| def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"): |
| Adapter.__init__(self, subcon) |
| if radix > len(digits): |
| raise ValueError("not enough digits for radix %d" % (radix,)) |
| self.radix = radix |
| self.digits = digits |
| def _encode(self, obj, context): |
| chars = [] |
| if obj < 0: |
| chars.append("-") |
| n = -obj |
| else: |
| n = obj |
| r = self.radix |
| digs = self.digits |
| while n > 0: |
| n, d = divmod(n, r) |
| chars.append(digs[d]) |
| # obj2 = "".join(reversed(chars)) |
| # filler = digs[0] * (self._sizeof(context) - len(obj2)) |
| # return filler + obj2 |
| return "".join(reversed(chars)) |
| def _decode(self, obj, context): |
| return int("".join(obj), self.radix) |
| |
| def DecNumber(name): |
| """decimal number""" |
| return TextualIntAdapter(GreedyRange(Digit(name))) |
| |
| def BinNumber(name): |
| """binary number""" |
| return TextualIntAdapter(GreedyRange(Digit(name)), 2) |
| |
| def HexNumber(name): |
| """hexadecimal number""" |
| return TextualIntAdapter(GreedyRange(Digit(name)), 16) |
| |
| def StringUpto(name, charset): |
| """a string that stretches up to a terminator, or EOF. unlike CString, |
| StringUpto will no consume the terminator char. |
| * name - the name of the field |
| * charset - the set of terminator characters""" |
| return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset))) |
| |
| def Line(name): |
| r"""a textual line (up to "\n")""" |
| return StringUpto(name, "\n") |
| |
| class IdentifierAdapter(Adapter): |
| """ |
| Adapter for programmatic identifiers |
| |
| Parameters: |
| * subcon - the subcon to adapt |
| """ |
| def _encode(self, obj, context): |
| return obj[0], obj[1:] |
| def _decode(self, obj, context): |
| return obj[0] + "".join(obj[1]) |
| |
| def Identifier(name, |
| headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), |
| tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") |
| ): |
| """a programmatic identifier (symbol). must start with a char of headset, |
| followed by a sequence of tailset characters |
| * name - the name of the field |
| * headset - charset for the first character. default is A-Z, a-z, and _ |
| * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _ |
| """ |
| return IdentifierAdapter( |
| Sequence(name, |
| CharOf("head", headset), |
| OptionalGreedyRange(CharOf("tail", tailset)), |
| ) |
| ) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |