Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-80222: Fix email address header folding with long quoted-string #122753

Merged
merged 4 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,16 @@
NLSET = {'\n', '\r'}
SPECIALSNL = SPECIALS | NLSET


def make_quoted_pairs(value):
"""Escape dquote and backslash for use within a quoted-string."""
return str(value).replace('\\', '\\\\').replace('"', '\\"')


def quote_string(value):
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
escaped = make_quoted_pairs(value)
return f'"{escaped}"'


# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
rfc2047_matcher = re.compile(r'''
Expand Down Expand Up @@ -2905,6 +2913,15 @@ def _refold_parse_tree(parse_tree, *, policy):
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
newparts = list(part)
if part.token_type == 'bare-quoted-string':
# To fold a quoted string we need to create a list of terminal
# tokens that will render the leading and trailing quotes
# and use quoted pairs in the value as appropriate.
newparts = (
[ValueTerminal('"', 'ptext')] +
[ValueTerminal(make_quoted_pairs(p), 'ptext')
for p in newparts] +
[ValueTerminal('"', 'ptext')])
if not part.as_ew_allowed:
wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed)
Expand Down
31 changes: 29 additions & 2 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3082,13 +3082,40 @@ def test_address_list_with_list_separator_after_fold(self):
self._test(parser.get_address_list(to)[0],
f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>\n')

a = '.' * 79
a = '.' * 79 # ('.' is a special, so must be in quoted-string.)
to = f'"{a}" <xyz@example.com>, "Hübsch Kaktus" <beautiful@example.com>'
self._test(parser.get_address_list(to)[0],
f'{a}\n'
f'"{a}"\n'
' <xyz@example.com>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
'<beautiful@example.com>\n')

def test_address_list_with_specials_in_long_quoted_string(self):
# Regression for gh-80222.
policy = self.policy.clone(max_line_length=40)
cases = [
# (to, folded)
('"Exfiltrator <spy@example.org> (unclosed comment?" <to@example.com>',
'"Exfiltrator <spy@example.org> (unclosed\n'
' comment?" <to@example.com>\n'),
('"Escaped \\" chars \\\\ in quoted-string stay escaped" <to@example.com>',
'"Escaped \\" chars \\\\ in quoted-string\n'
' stay escaped" <to@example.com>\n'),
('This long display name does not need quotes <to@example.com>',
'This long display name does not need\n'
' quotes <to@example.com>\n'),
('"Quotes are not required but are retained here" <to@example.com>',
'"Quotes are not required but are\n'
' retained here" <to@example.com>\n'),
('"A quoted-string, it can be a valid local-part"@example.com',
'"A quoted-string, it can be a valid\n'
' local-part"@example.com\n'),
('"local-part-with-specials@but-no-fws.cannot-fold"@example.com',
'"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'),
]
for (to, folded) in cases:
with self.subTest(to=to):
self._test(parser.get_address_list(to)[0], folded, policy=policy)

# XXX Need tests with comments on various sides of a unicode token,
# and with unicode tokens in the comments. Spaces inside the quotes
# currently don't do the right thing.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fix bug in the folding of quoted strings when flattening an email message using
a modern email policy. Previously when a quoted string was folded so that
it spanned more than one line, the surrounding quotes and internal escapes
would be omitted. This could theoretically be used to spoof header lines
using a carefully constructed quoted string if the resulting rendered email
was transmitted or re-parsed.
Loading