| From 5c17dfc5d70ce88be99bc5769b91ce79d7a90d61 Mon Sep 17 00:00:00 2001 |
| From: Senthil Kumaran <senthil@uthcode.com> |
| Date: Mon, 15 Feb 2021 11:16:43 -0800 |
| Subject: [PATCH] [3.6] bpo-42967: only use '&' as a query string separator |
| (GH-24297) (GH-24532) |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| bpo-42967: [security] Address a web cache-poisoning issue reported in |
| urllib.parse.parse_qsl(). |
| |
| urllib.parse will only us "&" as query string separator by default |
| instead of both ";" and "&" as allowed in earlier versions. An optional |
| argument seperator with default value "&" is added to specify the |
| separator. |
| |
| Co-authored-by: Éric Araujo <merwok@netwok.org> |
| Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> |
| Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com> |
| --- |
| Doc/library/cgi.rst | 8 ++- |
| Doc/library/urllib.parse.rst | 22 +++++- |
| Doc/whatsnew/3.6.rst | 13 ++++ |
| Lib/cgi.py | 17 +++-- |
| Lib/test/test_cgi.py | 29 ++++++-- |
| Lib/test/test_urlparse.py | 68 +++++++++++++------ |
| Lib/urllib/parse.py | 19 ++++-- |
| .../2021-02-14-15-59-16.bpo-42967.YApqDS.rst | 1 + |
| 8 files changed, 134 insertions(+), 43 deletions(-) |
| create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst |
| |
| diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst |
| index 41219eeaab..a3dad1b8b0 100644 |
| --- a/Doc/library/cgi.rst |
| +++ b/Doc/library/cgi.rst |
| @@ -277,13 +277,12 @@ These are useful if you want more control, or if you want to employ some of the |
| algorithms implemented in this module in other circumstances. |
| |
| |
| -.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False) |
| +.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&") |
| |
| Parse a query in the environment or from a file (the file defaults to |
| - ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are |
| + ``sys.stdin``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are |
| passed to :func:`urllib.parse.parse_qs` unchanged. |
| |
| - |
| .. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False) |
| |
| This function is deprecated in this module. Use :func:`urllib.parse.parse_qs` |
| @@ -308,6 +307,9 @@ algorithms implemented in this module in other circumstances. |
| Note that this does not parse nested multipart parts --- use |
| :class:`FieldStorage` for that. |
| |
| + .. versionchanged:: 3.6.13 |
| + Added the *separator* parameter. |
| + |
| |
| .. function:: parse_header(string) |
| |
| diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst |
| index 647af613a3..3c2e37ef20 100644 |
| --- a/Doc/library/urllib.parse.rst |
| +++ b/Doc/library/urllib.parse.rst |
| @@ -143,7 +143,7 @@ or on combining URL components into a URL string. |
| now raise :exc:`ValueError`. |
| |
| |
| -.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None) |
| +.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&') |
| |
| Parse a query string given as a string argument (data of type |
| :mimetype:`application/x-www-form-urlencoded`). Data are returned as a |
| @@ -168,6 +168,9 @@ or on combining URL components into a URL string. |
| read. If set, then throws a :exc:`ValueError` if there are more than |
| *max_num_fields* fields read. |
| |
| + The optional argument *separator* is the symbol to use for separating the |
| + query arguments. It defaults to ``&``. |
| + |
| Use the :func:`urllib.parse.urlencode` function (with the ``doseq`` |
| parameter set to ``True``) to convert such dictionaries into query |
| strings. |
| @@ -179,8 +182,14 @@ or on combining URL components into a URL string. |
| .. versionchanged:: 3.6.8 |
| Added *max_num_fields* parameter. |
| |
| + .. versionchanged:: 3.6.13 |
| + Added *separator* parameter with the default value of ``&``. Python |
| + versions earlier than Python 3.6.13 allowed using both ``;`` and ``&`` as |
| + query parameter separator. This has been changed to allow only a single |
| + separator key, with ``&`` as the default separator. |
| + |
| |
| -.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None) |
| +.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&') |
| |
| Parse a query string given as a string argument (data of type |
| :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of |
| @@ -204,6 +213,9 @@ or on combining URL components into a URL string. |
| read. If set, then throws a :exc:`ValueError` if there are more than |
| *max_num_fields* fields read. |
| |
| + The optional argument *separator* is the symbol to use for separating the |
| + query arguments. It defaults to ``&``. |
| + |
| Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into |
| query strings. |
| |
| @@ -213,6 +225,12 @@ or on combining URL components into a URL string. |
| .. versionchanged:: 3.6.8 |
| Added *max_num_fields* parameter. |
| |
| + .. versionchanged:: 3.6.13 |
| + Added *separator* parameter with the default value of ``&``. Python |
| + versions earlier than Python 3.6.13 allowed using both ``;`` and ``&`` as |
| + query parameter separator. This has been changed to allow only a single |
| + separator key, with ``&`` as the default separator. |
| + |
| |
| .. function:: urlunparse(parts) |
| |
| diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst |
| index 90250e46b6..296935adad 100644 |
| --- a/Doc/whatsnew/3.6.rst |
| +++ b/Doc/whatsnew/3.6.rst |
| @@ -2459,3 +2459,16 @@ because of the behavior of the socket option ``SO_REUSEADDR`` in UDP. For more |
| details, see the documentation for ``loop.create_datagram_endpoint()``. |
| (Contributed by Kyle Stanley, Antoine Pitrou, and Yury Selivanov in |
| :issue:`37228`.) |
| + |
| +Notable changes in Python 3.6.13 |
| +================================ |
| + |
| +Earlier Python versions allowed using both ``;`` and ``&`` as |
| +query parameter separators in :func:`urllib.parse.parse_qs` and |
| +:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with |
| +newer W3C recommendations, this has been changed to allow only a single |
| +separator key, with ``&`` as the default. This change also affects |
| +:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected |
| +functions internally. For more details, please see their respective |
| +documentation. |
| +(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.) |
| diff --git a/Lib/cgi.py b/Lib/cgi.py |
| index 56f243e09f..1483bedbd5 100755 |
| --- a/Lib/cgi.py |
| +++ b/Lib/cgi.py |
| @@ -117,7 +117,8 @@ log = initlog # The current logging function |
| # 0 ==> unlimited input |
| maxlen = 0 |
| |
| -def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): |
| +def parse(fp=None, environ=os.environ, keep_blank_values=0, |
| + strict_parsing=0, separator='&'): |
| """Parse a query in the environment or from a file (default stdin) |
| |
| Arguments, all optional: |
| @@ -136,6 +137,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): |
| strict_parsing: flag indicating what to do with parsing errors. |
| If false (the default), errors are silently ignored. |
| If true, errors raise a ValueError exception. |
| + |
| + separator: str. The symbol to use for separating the query arguments. |
| + Defaults to &. |
| """ |
| if fp is None: |
| fp = sys.stdin |
| @@ -180,7 +184,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): |
| qs = "" |
| environ['QUERY_STRING'] = qs # XXX Shouldn't, really |
| return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing, |
| - encoding=encoding) |
| + encoding=encoding, separator=separator) |
| |
| |
| # parse query string function called from urlparse, |
| @@ -405,7 +409,7 @@ class FieldStorage: |
| def __init__(self, fp=None, headers=None, outerboundary=b'', |
| environ=os.environ, keep_blank_values=0, strict_parsing=0, |
| limit=None, encoding='utf-8', errors='replace', |
| - max_num_fields=None): |
| + max_num_fields=None, separator='&'): |
| """Constructor. Read multipart/* until last part. |
| |
| Arguments, all optional: |
| @@ -453,6 +457,7 @@ class FieldStorage: |
| self.keep_blank_values = keep_blank_values |
| self.strict_parsing = strict_parsing |
| self.max_num_fields = max_num_fields |
| + self.separator = separator |
| if 'REQUEST_METHOD' in environ: |
| method = environ['REQUEST_METHOD'].upper() |
| self.qs_on_post = None |
| @@ -678,7 +683,7 @@ class FieldStorage: |
| query = urllib.parse.parse_qsl( |
| qs, self.keep_blank_values, self.strict_parsing, |
| encoding=self.encoding, errors=self.errors, |
| - max_num_fields=self.max_num_fields) |
| + max_num_fields=self.max_num_fields, separator=self.separator) |
| self.list = [MiniFieldStorage(key, value) for key, value in query] |
| self.skip_lines() |
| |
| @@ -694,7 +699,7 @@ class FieldStorage: |
| query = urllib.parse.parse_qsl( |
| self.qs_on_post, self.keep_blank_values, self.strict_parsing, |
| encoding=self.encoding, errors=self.errors, |
| - max_num_fields=self.max_num_fields) |
| + max_num_fields=self.max_num_fields, separator=self.separator) |
| self.list.extend(MiniFieldStorage(key, value) for key, value in query) |
| |
| klass = self.FieldStorageClass or self.__class__ |
| @@ -736,7 +741,7 @@ class FieldStorage: |
| |
| part = klass(self.fp, headers, ib, environ, keep_blank_values, |
| strict_parsing,self.limit-self.bytes_read, |
| - self.encoding, self.errors, max_num_fields) |
| + self.encoding, self.errors, max_num_fields, self.separator) |
| |
| if max_num_fields is not None: |
| max_num_fields -= 1 |
| diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py |
| index b3e2d4cce8..b4d5a12eef 100644 |
| --- a/Lib/test/test_cgi.py |
| +++ b/Lib/test/test_cgi.py |
| @@ -55,12 +55,9 @@ parse_strict_test_cases = [ |
| ("", ValueError("bad query field: ''")), |
| ("&", ValueError("bad query field: ''")), |
| ("&&", ValueError("bad query field: ''")), |
| - (";", ValueError("bad query field: ''")), |
| - (";&;", ValueError("bad query field: ''")), |
| # Should the next few really be valid? |
| ("=", {}), |
| ("=&=", {}), |
| - ("=;=", {}), |
| # This rest seem to make sense |
| ("=a", {'': ['a']}), |
| ("&=a", ValueError("bad query field: ''")), |
| @@ -75,8 +72,6 @@ parse_strict_test_cases = [ |
| ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), |
| ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), |
| ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), |
| - ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), |
| - ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), |
| ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", |
| {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], |
| 'cuyer': ['r'], |
| @@ -180,6 +175,30 @@ class CgiTests(unittest.TestCase): |
| else: |
| self.assertEqual(fs.getvalue(key), expect_val[0]) |
| |
| + def test_separator(self): |
| + parse_semicolon = [ |
| + ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}), |
| + ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), |
| + (";", ValueError("bad query field: ''")), |
| + (";;", ValueError("bad query field: ''")), |
| + ("=;a", ValueError("bad query field: 'a'")), |
| + (";b=a", ValueError("bad query field: ''")), |
| + ("b;=a", ValueError("bad query field: 'b'")), |
| + ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), |
| + ("a=a+b;a=b+a", {'a': ['a b', 'b a']}), |
| + ] |
| + for orig, expect in parse_semicolon: |
| + env = {'QUERY_STRING': orig} |
| + fs = cgi.FieldStorage(separator=';', environ=env) |
| + if isinstance(expect, dict): |
| + for key in expect.keys(): |
| + expect_val = expect[key] |
| + self.assertIn(key, fs) |
| + if len(expect_val) > 1: |
| + self.assertEqual(fs.getvalue(key), expect_val) |
| + else: |
| + self.assertEqual(fs.getvalue(key), expect_val[0]) |
| + |
| def test_log(self): |
| cgi.log("Testing") |
| |
| diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py |
| index 68f633ca3a..e3088b2f39 100644 |
| --- a/Lib/test/test_urlparse.py |
| +++ b/Lib/test/test_urlparse.py |
| @@ -32,16 +32,10 @@ parse_qsl_test_cases = [ |
| (b"&a=b", [(b'a', b'b')]), |
| (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), |
| (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), |
| - (";", []), |
| - (";;", []), |
| - (";a=b", [('a', 'b')]), |
| - ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), |
| - ("a=1;a=2", [('a', '1'), ('a', '2')]), |
| - (b";", []), |
| - (b";;", []), |
| - (b";a=b", [(b'a', b'b')]), |
| - (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), |
| - (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), |
| + (";a=b", [(';a', 'b')]), |
| + ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), |
| + (b";a=b", [(b';a', b'b')]), |
| + (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), |
| ] |
| |
| # Each parse_qs testcase is a two-tuple that contains |
| @@ -68,16 +62,10 @@ parse_qs_test_cases = [ |
| (b"&a=b", {b'a': [b'b']}), |
| (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), |
| (b"a=1&a=2", {b'a': [b'1', b'2']}), |
| - (";", {}), |
| - (";;", {}), |
| - (";a=b", {'a': ['b']}), |
| - ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), |
| - ("a=1;a=2", {'a': ['1', '2']}), |
| - (b";", {}), |
| - (b";;", {}), |
| - (b";a=b", {b'a': [b'b']}), |
| - (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), |
| - (b"a=1;a=2", {b'a': [b'1', b'2']}), |
| + (";a=b", {';a': ['b']}), |
| + ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), |
| + (b";a=b", {b';a': [b'b']}), |
| + (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), |
| ] |
| |
| class UrlParseTestCase(unittest.TestCase): |
| @@ -884,10 +872,46 @@ class UrlParseTestCase(unittest.TestCase): |
| def test_parse_qsl_max_num_fields(self): |
| with self.assertRaises(ValueError): |
| urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10) |
| - with self.assertRaises(ValueError): |
| - urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10) |
| urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10) |
| |
| + def test_parse_qs_separator(self): |
| + parse_qs_semicolon_cases = [ |
| + (";", {}), |
| + (";;", {}), |
| + (";a=b", {'a': ['b']}), |
| + ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), |
| + ("a=1;a=2", {'a': ['1', '2']}), |
| + (b";", {}), |
| + (b";;", {}), |
| + (b";a=b", {b'a': [b'b']}), |
| + (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), |
| + (b"a=1;a=2", {b'a': [b'1', b'2']}), |
| + ] |
| + for orig, expect in parse_qs_semicolon_cases: |
| + with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): |
| + result = urllib.parse.parse_qs(orig, separator=';') |
| + self.assertEqual(result, expect, "Error parsing %r" % orig) |
| + |
| + |
| + def test_parse_qsl_separator(self): |
| + parse_qsl_semicolon_cases = [ |
| + (";", []), |
| + (";;", []), |
| + (";a=b", [('a', 'b')]), |
| + ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), |
| + ("a=1;a=2", [('a', '1'), ('a', '2')]), |
| + (b";", []), |
| + (b";;", []), |
| + (b";a=b", [(b'a', b'b')]), |
| + (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), |
| + (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), |
| + ] |
| + for orig, expect in parse_qsl_semicolon_cases: |
| + with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): |
| + result = urllib.parse.parse_qsl(orig, separator=';') |
| + self.assertEqual(result, expect, "Error parsing %r" % orig) |
| + |
| + |
| def test_urlencode_sequences(self): |
| # Other tests incidentally urlencode things; test non-covered cases: |
| # Sequence and object values. |
| diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py |
| index fa8827a9fa..66056bf589 100644 |
| --- a/Lib/urllib/parse.py |
| +++ b/Lib/urllib/parse.py |
| @@ -644,7 +644,7 @@ def unquote(string, encoding='utf-8', errors='replace'): |
| |
| |
| def parse_qs(qs, keep_blank_values=False, strict_parsing=False, |
| - encoding='utf-8', errors='replace', max_num_fields=None): |
| + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): |
| """Parse a query given as a string argument. |
| |
| Arguments: |
| @@ -668,12 +668,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, |
| max_num_fields: int. If set, then throws a ValueError if there |
| are more than n fields read by parse_qsl(). |
| |
| + separator: str. The symbol to use for separating the query arguments. |
| + Defaults to &. |
| + |
| Returns a dictionary. |
| """ |
| parsed_result = {} |
| pairs = parse_qsl(qs, keep_blank_values, strict_parsing, |
| encoding=encoding, errors=errors, |
| - max_num_fields=max_num_fields) |
| + max_num_fields=max_num_fields, separator=separator) |
| for name, value in pairs: |
| if name in parsed_result: |
| parsed_result[name].append(value) |
| @@ -683,7 +686,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, |
| |
| |
| def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, |
| - encoding='utf-8', errors='replace', max_num_fields=None): |
| + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): |
| """Parse a query given as a string argument. |
| |
| Arguments: |
| @@ -706,19 +709,25 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, |
| max_num_fields: int. If set, then throws a ValueError |
| if there are more than n fields read by parse_qsl(). |
| |
| + separator: str. The symbol to use for separating the query arguments. |
| + Defaults to &. |
| + |
| Returns a list, as G-d intended. |
| """ |
| qs, _coerce_result = _coerce_args(qs) |
| |
| + if not separator or (not isinstance(separator, (str, bytes))): |
| + raise ValueError("Separator must be of type string or bytes.") |
| + |
| # If max_num_fields is defined then check that the number of fields |
| # is less than max_num_fields. This prevents a memory exhaustion DOS |
| # attack via post bodies with many fields. |
| if max_num_fields is not None: |
| - num_fields = 1 + qs.count('&') + qs.count(';') |
| + num_fields = 1 + qs.count(separator) |
| if max_num_fields < num_fields: |
| raise ValueError('Max number of fields exceeded') |
| |
| - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] |
| + pairs = [s1 for s1 in qs.split(separator)] |
| r = [] |
| for name_value in pairs: |
| if not name_value and not strict_parsing: |
| diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst |
| new file mode 100644 |
| index 0000000000..f08489b414 |
| --- /dev/null |
| +++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst |
| @@ -0,0 +1 @@ |
| +Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator. |
| -- |
| 2.41.0.255.g8b1d071c50-goog |
| |