From b8170e58849a7728ebc9b58e222aa5bd051cf7c4 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
 <31488909+miss-islington@users.noreply.github.com>
Date: Sun, 19 Jan 2025 22:07:39 +0100
Subject: [PATCH] [3.12] gh-80222: Fix email address header folding with long
 quoted-string (GH-122753) (#129008)

gh-80222: Fix email address header folding with long quoted-string (GH-122753)

Email generators using email.policy.default could incorrectly omit the
quote ('"') characters from a quoted-string during header refolding,
leading to invalid address headers and enabling header spoofing. This
change restores the quote characters on a bare-quoted-string as the
header is refolded, and escapes backslash and quote chars in the string.
(cherry picked from commit 5aaf41685834901e4ed0a40f4c055b92991a0bb5)

Co-authored-by: Mike Edmunds <medmunds@gmail.com>
---
 Lib/email/_header_value_parser.py             | 19 +++++++++++-
 .../test_email/test__header_value_parser.py   | 31 +++++++++++++++++--
 ...4-08-06-11-43-08.gh-issue-80222.wfR4BU.rst |  6 ++++
 3 files changed, 53 insertions(+), 3 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index ec2215a5e5f33c..3d845c09d415f6 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -95,8 +95,16 @@
 NLSET = {'\n', '\r'}
 SPECIALSNL = SPECIALS | NLSET
 
+
+def make_quoted_pairs(value):
+    """Escape dquote and backslash for use within a quoted-string."""
+    return str(value).replace('\\', '\\\\').replace('"', '\\"')
+
+
 def quote_string(value):
-    return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+    escaped = make_quoted_pairs(value)
+    return f'"{escaped}"'
+
 
 # Match a RFC 2047 word, looks like =?utf-8?q?someword?=
 rfc2047_matcher = re.compile(r'''
@@ -2905,6 +2913,15 @@ def _refold_parse_tree(parse_tree, *, policy):
         if not hasattr(part, 'encode'):
             # It's not a terminal, try folding the subparts.
             newparts = list(part)
+            if part.token_type == 'bare-quoted-string':
+                # To fold a quoted string we need to create a list of terminal
+                # tokens that will render the leading and trailing quotes
+                # and use quoted pairs in the value as appropriate.
+                newparts = (
+                    [ValueTerminal('"', 'ptext')] +
+                    [ValueTerminal(make_quoted_pairs(p), 'ptext')
+                     for p in newparts] +
+                    [ValueTerminal('"', 'ptext')])
             if not part.as_ew_allowed:
                 wrap_as_ew_blocked += 1
                 newparts.append(end_ew_not_allowed)
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 5413319a414a62..efd1695711d7c1 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -3082,13 +3082,40 @@ def test_address_list_with_list_separator_after_fold(self):
         self._test(parser.get_address_list(to)[0],
             f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>\n')
 
-        a = '.' * 79
+        a = '.' * 79  # ('.' is a special, so must be in quoted-string.)
         to = f'"{a}" <xyz@example.com>, "Hübsch Kaktus" <beautiful@example.com>'
         self._test(parser.get_address_list(to)[0],
-            f'{a}\n'
+            f'"{a}"\n'
             ' <xyz@example.com>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
             '<beautiful@example.com>\n')
 
+    def test_address_list_with_specials_in_long_quoted_string(self):
+        # Regression for gh-80222.
+        policy = self.policy.clone(max_line_length=40)
+        cases = [
+            # (to, folded)
+            ('"Exfiltrator <spy@example.org> (unclosed comment?" <to@example.com>',
+             '"Exfiltrator <spy@example.org> (unclosed\n'
+             ' comment?" <to@example.com>\n'),
+            ('"Escaped \\" chars \\\\ in quoted-string stay escaped" <to@example.com>',
+             '"Escaped \\" chars \\\\ in quoted-string\n'
+             ' stay escaped" <to@example.com>\n'),
+            ('This long display name does not need quotes <to@example.com>',
+             'This long display name does not need\n'
+             ' quotes <to@example.com>\n'),
+            ('"Quotes are not required but are retained here" <to@example.com>',
+             '"Quotes are not required but are\n'
+             ' retained here" <to@example.com>\n'),
+            ('"A quoted-string, it can be a valid local-part"@example.com',
+             '"A quoted-string, it can be a valid\n'
+             ' local-part"@example.com\n'),
+            ('"local-part-with-specials@but-no-fws.cannot-fold"@example.com',
+             '"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'),
+        ]
+        for (to, folded) in cases:
+            with self.subTest(to=to):
+                self._test(parser.get_address_list(to)[0], folded, policy=policy)
+
     # XXX Need tests with comments on various sides of a unicode token,
     # and with unicode tokens in the comments.  Spaces inside the quotes
     # currently don't do the right thing.
diff --git a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
new file mode 100644
index 00000000000000..0f0661d0b1cf4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
@@ -0,0 +1,6 @@
+Fix bug in the folding of quoted strings when flattening an email message using
+a modern email policy. Previously when a quoted string was folded so that
+it spanned more than one line, the surrounding quotes and internal escapes
+would be omitted. This could theoretically be used to spoof header lines
+using a carefully constructed quoted string if the resulting rendered email
+was transmitted or re-parsed.