From b8170e58849a7728ebc9b58e222aa5bd051cf7c4 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 19 Jan 2025 22:07:39 +0100 Subject: [PATCH] [3.12] gh-80222: Fix email address header folding with long quoted-string (GH-122753) (#129008) gh-80222: Fix email address header folding with long quoted-string (GH-122753) Email generators using email.policy.default could incorrectly omit the quote ('"') characters from a quoted-string during header refolding, leading to invalid address headers and enabling header spoofing. This change restores the quote characters on a bare-quoted-string as the header is refolded, and escapes backslash and quote chars in the string. (cherry picked from commit 5aaf41685834901e4ed0a40f4c055b92991a0bb5) Co-authored-by: Mike Edmunds --- Lib/email/_header_value_parser.py | 19 +++++++++++- .../test_email/test__header_value_parser.py | 31 +++++++++++++++++-- ...4-08-06-11-43-08.gh-issue-80222.wfR4BU.rst | 6 ++++ 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index ec2215a5e5f33c..3d845c09d415f6 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -95,8 +95,16 @@ NLSET = {'\n', '\r'} SPECIALSNL = SPECIALS | NLSET + +def make_quoted_pairs(value): + """Escape dquote and backslash for use within a quoted-string.""" + return str(value).replace('\\', '\\\\').replace('"', '\\"') + + def quote_string(value): - return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' + escaped = make_quoted_pairs(value) + return f'"{escaped}"' + # Match a RFC 2047 word, looks like =?utf-8?q?someword?= rfc2047_matcher = re.compile(r''' @@ -2905,6 +2913,15 @@ def _refold_parse_tree(parse_tree, *, policy): if not hasattr(part, 'encode'): # It's not a terminal, try folding the subparts. newparts = list(part) + if part.token_type == 'bare-quoted-string': + # To fold a quoted string we need to create a list of terminal + # tokens that will render the leading and trailing quotes + # and use quoted pairs in the value as appropriate. + newparts = ( + [ValueTerminal('"', 'ptext')] + + [ValueTerminal(make_quoted_pairs(p), 'ptext') + for p in newparts] + + [ValueTerminal('"', 'ptext')]) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 5413319a414a62..efd1695711d7c1 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3082,13 +3082,40 @@ def test_address_list_with_list_separator_after_fold(self): self._test(parser.get_address_list(to)[0], f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus \n') - a = '.' * 79 + a = '.' * 79 # ('.' is a special, so must be in quoted-string.) to = f'"{a}" , "Hübsch Kaktus" ' self._test(parser.get_address_list(to)[0], - f'{a}\n' + f'"{a}"\n' ' , =?utf-8?q?H=C3=BCbsch?= Kaktus ' '\n') + def test_address_list_with_specials_in_long_quoted_string(self): + # Regression for gh-80222. + policy = self.policy.clone(max_line_length=40) + cases = [ + # (to, folded) + ('"Exfiltrator (unclosed comment?" ', + '"Exfiltrator (unclosed\n' + ' comment?" \n'), + ('"Escaped \\" chars \\\\ in quoted-string stay escaped" ', + '"Escaped \\" chars \\\\ in quoted-string\n' + ' stay escaped" \n'), + ('This long display name does not need quotes ', + 'This long display name does not need\n' + ' quotes \n'), + ('"Quotes are not required but are retained here" ', + '"Quotes are not required but are\n' + ' retained here" \n'), + ('"A quoted-string, it can be a valid local-part"@example.com', + '"A quoted-string, it can be a valid\n' + ' local-part"@example.com\n'), + ('"local-part-with-specials@but-no-fws.cannot-fold"@example.com', + '"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'), + ] + for (to, folded) in cases: + with self.subTest(to=to): + self._test(parser.get_address_list(to)[0], folded, policy=policy) + # XXX Need tests with comments on various sides of a unicode token, # and with unicode tokens in the comments. Spaces inside the quotes # currently don't do the right thing. diff --git a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst new file mode 100644 index 00000000000000..0f0661d0b1cf4a --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst @@ -0,0 +1,6 @@ +Fix bug in the folding of quoted strings when flattening an email message using +a modern email policy. Previously when a quoted string was folded so that +it spanned more than one line, the surrounding quotes and internal escapes +would be omitted. This could theoretically be used to spoof header lines +using a carefully constructed quoted string if the resulting rendered email +was transmitted or re-parsed.