Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix numerous message parsing issues. #3059

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions gunicorn/http/body.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,9 @@ def parse_chunk_size(self, unreader, data=None):
line, rest_chunk = data[:idx], data[idx + 2:]

chunk_size = line.split(b";", 1)[0].strip()
try:
chunk_size = int(chunk_size, 16)
except ValueError:
if any(n not in b"0123456789abcdefABCDEF" for n in chunk_size):
raise InvalidChunkSize(chunk_size)
chunk_size = int(chunk_size, 16)

if chunk_size == 0:
try:
Expand Down
38 changes: 17 additions & 21 deletions gunicorn/http/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
MAX_HEADERS = 32768
DEFAULT_MAX_HEADERFIELD_SIZE = 8190

HEADER_RE = re.compile(r"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\"]")
METH_RE = re.compile(r"[A-Z0-9$-_.]{3,20}")
TOKEN_RE = re.compile(r"[!#$%&'*+\-.\^_`|~0-9a-zA-Z]+")
VERSION_RE = re.compile(r"HTTP/(\d+)\.(\d+)")


Expand Down Expand Up @@ -63,8 +62,8 @@ def parse_headers(self, data):
cfg = self.cfg
headers = []

# Split lines on \r\n keeping the \r\n on each line
lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")]
# Split lines on \r\n
lines = [bytes_to_str(line) for line in data.split(b"\r\n")]

# handle scheme headers
scheme_header = False
Expand All @@ -80,30 +79,27 @@ def parse_headers(self, data):
if len(headers) >= self.limit_request_fields:
raise LimitRequestHeaders("limit request headers fields")

# Parse initial header name : value pair.
# Parse initial header name: value pair.
curr = lines.pop(0)
header_length = len(curr)
if curr.find(":") < 0:
raise InvalidHeader(curr.strip())
header_length = len(curr) + len("\r\n")
if curr.find(":") <= 0:
raise InvalidHeader(curr)
name, value = curr.split(":", 1)
if self.cfg.strip_header_spaces:
name = name.rstrip(" \t").upper()
else:
name = name.upper()
if HEADER_RE.search(name):
if not TOKEN_RE.fullmatch(name):
raise InvalidHeaderName(name)

name, value = name.strip(), [value.lstrip()]
value = [value.lstrip(" \t")]

# Consume value continuation lines
while lines and lines[0].startswith((" ", "\t")):
curr = lines.pop(0)
header_length += len(curr)
if header_length > self.limit_request_field_size > 0:
benoitc marked this conversation as resolved.
Show resolved Hide resolved
raise LimitRequestHeaders("limit request headers "
"fields size")
value.append(curr)
value = ''.join(value).rstrip()
header_length += len(curr) + len("\r\n")
value.append(curr.strip("\t "))
value = " ".join(value)

if header_length > self.limit_request_field_size > 0:
raise LimitRequestHeaders("limit request headers fields size")
Expand Down Expand Up @@ -156,7 +152,7 @@ def set_body_reader(self):
def should_close(self):
for (h, v) in self.headers:
if h == "CONNECTION":
v = v.lower().strip()
v = v.lower().strip(" \t")
if v == "close":
return True
elif v == "keep-alive":
Expand Down Expand Up @@ -283,7 +279,7 @@ def proxy_protocol_access_check(self):
raise ForbiddenProxyRequest(self.peer_addr[0])

def parse_proxy_protocol(self, line):
bits = line.split()
bits = line.split(" ")

if len(bits) != 6:
raise InvalidProxyLine(line)
Expand Down Expand Up @@ -328,12 +324,12 @@ def parse_proxy_protocol(self, line):
}

def parse_request_line(self, line_bytes):
bits = [bytes_to_str(bit) for bit in line_bytes.split(None, 2)]
bits = [bytes_to_str(bit) for bit in line_bytes.split(b" ", 2)]
if len(bits) != 3:
raise InvalidRequestLine(bytes_to_str(line_bytes))

# Method
if not METH_RE.match(bits[0]):
if not TOKEN_RE.fullmatch(bits[0]):
raise InvalidRequestMethod(bits[0])
self.method = bits[0].upper()

Expand All @@ -349,7 +345,7 @@ def parse_request_line(self, line_bytes):
self.fragment = parts.fragment or ""

# Version
match = VERSION_RE.match(bits[2])
match = VERSION_RE.fullmatch(bits[2])
if match is None:
raise InvalidHTTPVersion(bits[2])
self.version = (int(match.group(1)), int(match.group(2)))
Expand Down
20 changes: 10 additions & 10 deletions gunicorn/http/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import re
import sys

from gunicorn.http.message import HEADER_RE
from gunicorn.http.message import TOKEN_RE
from gunicorn.http.errors import InvalidHeader, InvalidHeaderName
from gunicorn import SERVER_SOFTWARE, SERVER
from gunicorn import util
Expand All @@ -18,7 +18,7 @@
# with sending files in blocks over 2GB.
BLKSIZE = 0x3FFFFFFF

HEADER_VALUE_RE = re.compile(r'[\x00-\x1F\x7F]')
HEADER_VALUE_RE = re.compile(r'[ \t\x21-\x7e\x80-\xff]*')

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -249,31 +249,31 @@ def process_headers(self, headers):
if not isinstance(name, str):
raise TypeError('%r is not a string' % name)

if HEADER_RE.search(name):
if not TOKEN_RE.fullmatch(name):
raise InvalidHeaderName('%r' % name)

if not isinstance(value, str):
raise TypeError('%r is not a string' % value)

if HEADER_VALUE_RE.search(value):
if not HEADER_VALUE_RE.fullmatch(value):
raise InvalidHeader('%r' % value)

value = value.strip()
lname = name.lower().strip()
value = value.strip(" \t")
lname = name.lower()
if lname == "content-length":
self.response_length = int(value)
elif util.is_hoppish(name):
if lname == "connection":
# handle websocket
if value.lower().strip() == "upgrade":
if value.lower() == "upgrade":
self.upgrade = True
elif lname == "upgrade":
if value.lower().strip() == "websocket":
self.headers.append((name.strip(), value))
if value.lower() == "websocket":
self.headers.append((name, value))

# ignore hopbyhop headers
continue
self.headers.append((name.strip(), value))
self.headers.append((name, value))

def is_chunked(self):
# Only use chunked responses when the client is
Expand Down
4 changes: 2 additions & 2 deletions tests/requests/invalid/003.http
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
-blargh /foo HTTP/1.1\r\n
\r\n
GET\n/\nHTTP/1.1\r\n
\r\n
4 changes: 2 additions & 2 deletions tests/requests/invalid/003.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from gunicorn.http.errors import InvalidRequestMethod
request = InvalidRequestMethod
from gunicorn.http.errors import InvalidRequestLine
request = InvalidRequestLine
64 changes: 32 additions & 32 deletions tests/requests/valid/016.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
certificate = """-----BEGIN CERTIFICATE-----\r\n
MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n
ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n
AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n
dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n
SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n
BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n
BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n
W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n
gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n
0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n
u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n
wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n
1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n
BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n
VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n
loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n
aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n
9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n
IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n
BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n
cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg\r\n
EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC\r\n
5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n
Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n
XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n
UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n
hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n
wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n
Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n
RA==\r\n
-----END CERTIFICATE-----""".replace("\n\n", "\n")
certificate = """-----BEGIN CERTIFICATE-----
MIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx
ETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT
AkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu
dWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV
SzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV
BAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB
BQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF
W51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR
gW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL
0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP
u2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR
wgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG
1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs
BglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD
VR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj
loCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj
aWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG
9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE
IjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO
BgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1
cHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4Qg
EDBDAWLmh0dHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC
5jcmwwPwYDVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv
Y3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3
XCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8
UO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk
hTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK
wTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu
Yhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3
RA==
-----END CERTIFICATE-----""".replace("\n", "")

request = {
"method": "GET",
Expand Down
2 changes: 2 additions & 0 deletions tests/requests/valid/031.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-blargh /foo HTTP/1.1\r\n
\r\n
7 changes: 7 additions & 0 deletions tests/requests/valid/031.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
request = {
"method": "-BLARGH",
"uri": uri("/foo"),
"version": (1, 1),
"headers": [],
"body": b""
}