From bec1ce23e6a85b9422bedf6a32911ff2939720af Mon Sep 17 00:00:00 2001 From: Tom Most Date: Mon, 1 Jan 2024 20:53:41 -0800 Subject: [PATCH] Reject empty quoted charset --- src/treq/content.py | 6 ++++-- src/treq/test/test_content.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/treq/content.py b/src/treq/content.py index 06118f14..67f109f6 100644 --- a/src/treq/content.py +++ b/src/treq/content.py @@ -16,7 +16,7 @@ See https://www.rfc-editor.org/errata/eid5433 """ -_MIME_CHARSET_CHARS: Final[str] = ( +_MIME_CHARSET_CHARS: Final[frozenset[str]] = frozenset( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" # ALPHA "0123456789" # DIGIT "!#$%&+-^_`~" # symbols @@ -35,7 +35,9 @@ def _encoding_from_headers(headers: Headers) -> Optional[str]: charset = params.get("charset") if charset: charset = charset.strip("'\"").lower() - if any(c not in _MIME_CHARSET_CHARS for c in charset): + if not charset: + return None + if not set(charset).issubset(_MIME_CHARSET_CHARS): return None return charset diff --git a/src/treq/test/test_content.py b/src/treq/test/test_content.py index 60835814..162ba558 100644 --- a/src/treq/test/test_content.py +++ b/src/treq/test/test_content.py @@ -317,6 +317,11 @@ def test_quotedString(self): def test_noCharset(self): """None is returned when no valid charset parameter is found.""" - self.assertIsNone(self._encodingFromContentType("application/octet-stream")) - self.assertIsNone(self._encodingFromContentType("text/plain;charset=")) - self.assertIsNone(self._encodingFromContentType("text/plain;charset=🙃")) + for example in [ + "application/octet-stream", + "text/plain;charset=", + "text/plain;charset=''", + "text/plain;charset=\"'\"", + "text/plain;charset=🙃", + ]: + self.assertIsNone(self._encodingFromContentType(example))