| From f68d2d69f1da56c2aea1293ecf93ab69a6010ad7 Mon Sep 17 00:00:00 2001 |
| From: "Miss Islington (bot)" |
| <31488909+miss-islington@users.noreply.github.com> |
| Date: Thu, 6 May 2021 10:05:37 -0700 |
| Subject: [PATCH] bpo-44022: Fix http client infinite line reading (DoS) after |
| a HTTP 100 Continue (GH-25916) (GH-25935) |
| |
| Fixes http.client potential denial of service where it could get stuck reading lines from a malicious server after a 100 Continue response. |
| |
| Co-authored-by: Gregory P. Smith <greg@krypto.org> |
| (cherry picked from commit 47895e31b6f626bc6ce47d175fe9d43c1098909d) |
| |
| Co-authored-by: Gen Xu <xgbarry@gmail.com> |
| --- |
| Lib/http/client.py | 38 ++++++++++--------- |
| Lib/test/test_httplib.py | 10 ++++- |
| .../2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst | 2 + |
| 3 files changed, 32 insertions(+), 18 deletions(-) |
| create mode 100644 Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst |
| |
| diff --git a/Lib/http/client.py b/Lib/http/client.py |
| index 53581eca20..07e675fac5 100644 |
| --- a/Lib/http/client.py |
| +++ b/Lib/http/client.py |
| @@ -205,15 +205,11 @@ class HTTPMessage(email.message.Message): |
| lst.append(line) |
| return lst |
| |
| -def parse_headers(fp, _class=HTTPMessage): |
| - """Parses only RFC2822 headers from a file pointer. |
| - |
| - email Parser wants to see strings rather than bytes. |
| - But a TextIOWrapper around self.rfile would buffer too many bytes |
| - from the stream, bytes which we later need to read as bytes. |
| - So we read the correct bytes here, as bytes, for email Parser |
| - to parse. |
| +def _read_headers(fp): |
| + """Reads potential header lines into a list from a file pointer. |
| |
| + Length of line is limited by _MAXLINE, and number of |
| + headers is limited by _MAXHEADERS. |
| """ |
| headers = [] |
| while True: |
| @@ -225,6 +221,19 @@ def parse_headers(fp, _class=HTTPMessage): |
| raise HTTPException("got more than %d headers" % _MAXHEADERS) |
| if line in (b'\r\n', b'\n', b''): |
| break |
| + return headers |
| + |
| +def parse_headers(fp, _class=HTTPMessage): |
| + """Parses only RFC2822 headers from a file pointer. |
| + |
| + email Parser wants to see strings rather than bytes. |
| + But a TextIOWrapper around self.rfile would buffer too many bytes |
| + from the stream, bytes which we later need to read as bytes. |
| + So we read the correct bytes here, as bytes, for email Parser |
| + to parse. |
| + |
| + """ |
| + headers = _read_headers(fp) |
| hstring = b''.join(headers).decode('iso-8859-1') |
| return email.parser.Parser(_class=_class).parsestr(hstring) |
| |
| @@ -312,15 +321,10 @@ class HTTPResponse(io.BufferedIOBase): |
| if status != CONTINUE: |
| break |
| # skip the header from the 100 response |
| - while True: |
| - skip = self.fp.readline(_MAXLINE + 1) |
| - if len(skip) > _MAXLINE: |
| - raise LineTooLong("header line") |
| - skip = skip.strip() |
| - if not skip: |
| - break |
| - if self.debuglevel > 0: |
| - print("header:", skip) |
| + skipped_headers = _read_headers(self.fp) |
| + if self.debuglevel > 0: |
| + print("headers:", skipped_headers) |
| + del skipped_headers |
| |
| self.code = self.status = status |
| self.reason = reason.strip() |
| diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py |
| index 03e049b13f..0db287507c 100644 |
| --- a/Lib/test/test_httplib.py |
| +++ b/Lib/test/test_httplib.py |
| @@ -971,6 +971,14 @@ class BasicTest(TestCase): |
| resp = client.HTTPResponse(FakeSocket(body)) |
| self.assertRaises(client.LineTooLong, resp.begin) |
| |
| + def test_overflowing_header_limit_after_100(self): |
| + body = ( |
| + 'HTTP/1.1 100 OK\r\n' |
| + 'r\n' * 32768 |
| + ) |
| + resp = client.HTTPResponse(FakeSocket(body)) |
| + self.assertRaises(client.HTTPException, resp.begin) |
| + |
| def test_overflowing_chunked_line(self): |
| body = ( |
| 'HTTP/1.1 200 OK\r\n' |
| @@ -1377,7 +1385,7 @@ class Readliner: |
| class OfflineTest(TestCase): |
| def test_all(self): |
| # Documented objects defined in the module should be in __all__ |
| - expected = {"responses"} # White-list documented dict() object |
| + expected = {"responses"} # Allowlist documented dict() object |
| # HTTPMessage, parse_headers(), and the HTTP status code constants are |
| # intentionally omitted for simplicity |
| blacklist = {"HTTPMessage", "parse_headers"} |
| diff --git a/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst |
| new file mode 100644 |
| index 0000000000..cf6b63e396 |
| --- /dev/null |
| +++ b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst |
| @@ -0,0 +1,2 @@ |
| +mod:`http.client` now avoids infinitely reading potential HTTP headers after a |
| +``100 Continue`` status response from the server. |
| -- |
| 2.41.0.255.g8b1d071c50-goog |
| |