| From ce60712292e5734c44700eba16c0f3af5c298390 Mon Sep 17 00:00:00 2001 |
| From: Jean Abou Samra <jean@abou-samra.fr> |
| Date: Wed, 5 Apr 2023 23:21:48 +0200 |
| Subject: [PATCH] Improve Java properties lexer |
| |
| Use special lexer rules for escapes; fixes catastrophic backtracking, |
| and highlights them too. |
| |
| Fixes #2356 |
| --- |
| pygments/lexers/configs.py | 52 ++++++--- |
| tests/examplefiles/properties/java.properties | 13 ++- |
| .../properties/java.properties.output | 110 +++++++++++++++--- |
| .../test_escaped_space_in_value.txt | 4 +- |
| .../properties/test_just_key_with_space.txt | 4 +- |
| 5 files changed, 145 insertions(+), 38 deletions(-) |
| |
| diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py |
| index 3b1f9b4da5..1784215091 100644 |
| --- a/pygments/lexers/configs.py |
| +++ b/pygments/lexers/configs.py |
| @@ -129,26 +129,42 @@ class PropertiesLexer(RegexLexer): |
| |
| tokens = { |
| 'root': [ |
| - (r'\s+', Whitespace), |
| + # comments |
| (r'[!#].*|/{2}.*', Comment.Single), |
| - # search for first separator |
| - (r'([^\\\n]|\\.)*?(?=[ \f\t=:])', Name.Attribute, "separator"), |
| - # empty key |
| - (r'.+?$', Name.Attribute), |
| + # ending a comment or whitespace-only line |
| + (r'\n', Whitespace), |
| + # eat whitespace at the beginning of a line |
| + (r'^[^\S\n]+', Whitespace), |
| + # start lexing a key |
| + default('key'), |
| ], |
| - 'separator': [ |
| - # search for line continuation escape |
| - (r'([ \f\t]*)([=:]*)([ \f\t]*)(.*(?<!\\)(?:\\{2})*)(\\)(?!\\)$', |
| - bygroups(Whitespace, Operator, Whitespace, String, Text), "value", "#pop"), |
| - (r'([ \f\t]*)([=:]*)([ \f\t]*)(.*)', |
| - bygroups(Whitespace, Operator, Whitespace, String), "#pop"), |
| + 'key': [ |
| + # non-escaped key characters |
| + (r'[^\\:=\s]+', Name.Attribute), |
| + # escapes |
| + include('escapes'), |
| + # separator is the first non-escaped whitespace or colon or '=' on the line; |
| + # if it's whitespace, = and : are gobbled after it |
| + (r'([^\S\n]*)([:=])([^\S\n]*)', |
| + bygroups(Whitespace, Operator, Whitespace), |
| + ('#pop', 'value')), |
| + (r'[^\S\n]+', Whitespace, ('#pop', 'value')), |
| + # maybe we got no value after all |
| + (r'\n', Whitespace, '#pop'), |
| ], |
| - 'value': [ # line continuation |
| - (r'\s+', Whitespace), |
| - # search for line continuation escape |
| - (r'(\s*)(.*(?<!\\)(?:\\{2})*)(\\)(?!\\)([ \t]*)', |
| - bygroups(Whitespace, String, Text, Whitespace)), |
| - (r'.*$', String, "#pop"), |
| + 'value': [ |
| + # non-escaped value characters |
| + (r'[^\\\n]+', String), |
| + # escapes |
| + include('escapes'), |
| + # end the value on an unescaped newline |
| + (r'\n', Whitespace, '#pop'), |
| + ], |
| + 'escapes': [ |
| + # line continuations; these gobble whitespace at the beginning of the next line |
| + (r'(\\\n)([^\S\n]*)', bygroups(String.Escape, Whitespace)), |
| + # other escapes |
| + (r'\\(.|\n)', String.Escape), |
| ], |
| } |
| |
| @@ -1154,7 +1170,7 @@ class UnixConfigLexer(RegexLexer): |
| * ``/etc/group`` |
| * ``/etc/passwd`` |
| * ``/etc/shadow`` |
| - |
| + |
| .. versionadded:: 2.12 |
| """ |
| |
| diff --git a/tests/examplefiles/properties/java.properties b/tests/examplefiles/properties/java.properties |
| index d5b594e3d8..bdd6b76acb 100644 |
| --- a/tests/examplefiles/properties/java.properties |
| +++ b/tests/examplefiles/properties/java.properties |
| @@ -4,7 +4,7 @@ |
| Truth:Beauty |
| Truth Beauty |
| Truth :Beauty |
| - |
| + |
| ! line continuations and escapes |
| fruits apple, banana, pear, \ |
| cantaloupe, watermelon, \ |
| @@ -14,6 +14,8 @@ key = \ |
| and value2\\ |
| key\ 2 = value |
| key\\ 3 = value3 |
| +key \ |
| + = value |
| |
| ! empty keys and edge cases |
| key1 = |
| @@ -22,3 +24,12 @@ key3 the value3 |
| key4 the:value4 |
| key5 the=value5 |
| key6=the value6 |
| + |
| +! escapes in keys |
| +key\ with\ spaces = value |
| +key\nwith\nnewlines = value\nwith\nnewlines |
| + |
| + ! indented comment |
| + |
| +! line continuations do \ |
| +not = work for comments |
| diff --git a/tests/examplefiles/properties/java.properties.output b/tests/examplefiles/properties/java.properties.output |
| index 0c1fdeebf7..482257566b 100644 |
| --- a/tests/examplefiles/properties/java.properties.output |
| +++ b/tests/examplefiles/properties/java.properties.output |
| @@ -2,13 +2,17 @@ |
| '\n' Text.Whitespace |
| |
| '# mixing spaces' Comment.Single |
| -'\n\t' Text.Whitespace |
| +'\n' Text.Whitespace |
| + |
| +'\t' Text.Whitespace |
| 'Truth' Name.Attribute |
| ' ' Text.Whitespace |
| '=' Operator |
| ' ' Text.Whitespace |
| 'Beauty' Literal.String |
| -'\n ' Text.Whitespace |
| +'\n' Text.Whitespace |
| + |
| +' ' Text.Whitespace |
| 'Truth' Name.Attribute |
| ':' Operator |
| 'Beauty' Literal.String |
| @@ -23,18 +27,24 @@ |
| ' ' Text.Whitespace |
| ':' Operator |
| 'Beauty' Literal.String |
| -'\n \n' Text.Whitespace |
| +'\n' Text.Whitespace |
| + |
| +'\n' Text.Whitespace |
| |
| '! line continuations and escapes' Comment.Single |
| -'\n ' Text.Whitespace |
| +'\n' Text.Whitespace |
| + |
| +' ' Text.Whitespace |
| 'fruits' Name.Attribute |
| ' ' Text.Whitespace |
| 'apple, banana, pear, ' Literal.String |
| -'\\' Text |
| -'\n ' Text.Whitespace |
| +'\\\n' Literal.String.Escape |
| + |
| +' ' Text.Whitespace |
| 'cantaloupe, watermelon, ' Literal.String |
| -'\\' Text |
| -'\n ' Text.Whitespace |
| +'\\\n' Literal.String.Escape |
| + |
| +' ' Text.Whitespace |
| 'kiwi, mango' Literal.String |
| '\n' Text.Whitespace |
| |
| @@ -42,25 +52,42 @@ |
| ' ' Text.Whitespace |
| '=' Operator |
| ' ' Text.Whitespace |
| -'\\' Text |
| -'\n ' Text.Whitespace |
| -'value1 \\\\' Literal.String |
| -'\\' Text |
| -'\n ' Text.Whitespace |
| -'and value2\\\\' Literal.String |
| +'\\\n' Literal.String.Escape |
| + |
| +' ' Text.Whitespace |
| +'value1 ' Literal.String |
| +'\\\\' Literal.String.Escape |
| +'\\\n' Literal.String.Escape |
| + |
| +' ' Text.Whitespace |
| +'and value2' Literal.String |
| +'\\\\' Literal.String.Escape |
| '\n' Text.Whitespace |
| |
| -'key\\ 2' Name.Attribute |
| +'key' Name.Attribute |
| +'\\ ' Literal.String.Escape |
| +'2' Name.Attribute |
| ' ' Text.Whitespace |
| '=' Operator |
| ' ' Text.Whitespace |
| 'value' Literal.String |
| '\n' Text.Whitespace |
| |
| -'key\\\\' Name.Attribute |
| +'key' Name.Attribute |
| +'\\\\' Literal.String.Escape |
| ' ' Text.Whitespace |
| '3 = value3' Literal.String |
| -'\n\n' Text.Whitespace |
| +'\n' Text.Whitespace |
| + |
| +'key' Name.Attribute |
| +' ' Text.Whitespace |
| +'\\\n' Literal.String.Escape |
| + |
| +' ' Text.Whitespace |
| +'= value' Literal.String |
| +'\n' Text.Whitespace |
| + |
| +'\n' Text.Whitespace |
| |
| '! empty keys and edge cases' Comment.Single |
| '\n' Text.Whitespace |
| @@ -92,3 +119,52 @@ |
| '=' Operator |
| 'the value6' Literal.String |
| '\n' Text.Whitespace |
| + |
| +'\n' Text.Whitespace |
| + |
| +'! escapes in keys' Comment.Single |
| +'\n' Text.Whitespace |
| + |
| +'key' Name.Attribute |
| +'\\ ' Literal.String.Escape |
| +'with' Name.Attribute |
| +'\\ ' Literal.String.Escape |
| +'spaces' Name.Attribute |
| +' ' Text.Whitespace |
| +'=' Operator |
| +' ' Text.Whitespace |
| +'value' Literal.String |
| +'\n' Text.Whitespace |
| + |
| +'key' Name.Attribute |
| +'\\n' Literal.String.Escape |
| +'with' Name.Attribute |
| +'\\n' Literal.String.Escape |
| +'newlines' Name.Attribute |
| +' ' Text.Whitespace |
| +'=' Operator |
| +' ' Text.Whitespace |
| +'value' Literal.String |
| +'\\n' Literal.String.Escape |
| +'with' Literal.String |
| +'\\n' Literal.String.Escape |
| +'newlines' Literal.String |
| +'\n' Text.Whitespace |
| + |
| +'\n' Text.Whitespace |
| + |
| +' ' Text.Whitespace |
| +'! indented comment' Comment.Single |
| +'\n' Text.Whitespace |
| + |
| +'\n' Text.Whitespace |
| + |
| +'! line continuations do \\' Comment.Single |
| +'\n' Text.Whitespace |
| + |
| +'not' Name.Attribute |
| +' ' Text.Whitespace |
| +'=' Operator |
| +' ' Text.Whitespace |
| +'work for comments' Literal.String |
| +'\n' Text.Whitespace |
| diff --git a/tests/snippets/properties/test_escaped_space_in_value.txt b/tests/snippets/properties/test_escaped_space_in_value.txt |
| index f76507f4ec..44772d8c77 100644 |
| --- a/tests/snippets/properties/test_escaped_space_in_value.txt |
| +++ b/tests/snippets/properties/test_escaped_space_in_value.txt |
| @@ -6,5 +6,7 @@ key = doubleword\ value |
| ' ' Text.Whitespace |
| '=' Operator |
| ' ' Text.Whitespace |
| -'doubleword\\ value' Literal.String |
| +'doubleword' Literal.String |
| +'\\ ' Literal.String.Escape |
| +'value' Literal.String |
| '\n' Text.Whitespace |
| diff --git a/tests/snippets/properties/test_just_key_with_space.txt b/tests/snippets/properties/test_just_key_with_space.txt |
| index 660c37ca03..833fe4055e 100644 |
| --- a/tests/snippets/properties/test_just_key_with_space.txt |
| +++ b/tests/snippets/properties/test_just_key_with_space.txt |
| @@ -2,5 +2,7 @@ |
| just\ key |
| |
| ---tokens--- |
| -'just\\ key' Name.Attribute |
| +'just' Name.Attribute |
| +'\\ ' Literal.String.Escape |
| +'key' Name.Attribute |
| '\n' Text.Whitespace |