Skip to content

Commit

Permalink
[3.12] pythongh-124452: Fix header mismatches when folding/unfolding …
Browse files Browse the repository at this point in the history
…with email message (pythonGH-125919) (python#126916)

pythongh-124452: Fix header mismatches when folding/unfolding with email message (pythonGH-125919)

The header-folder of the new email API has a long standing known buglet where
if the first token is longer than max_line_length, it puts that token on the next
line.  It turns out there is also a *parsing* bug when parsing such a header:
the space prefixing that first, non-empty line gets preserved and tacked on to
the start of the header value, which is not the expected behavior per the RFCs.
The bug arises from the fact that the parser assumed that there would be at
least one token on the line with the header, which is going to be true for
probably every email producer other than the python email library with its
folding buglet.  Clearly, though, this is a case that needs to be handled
correctly.  The fix is simple: strip the blanks off the start of the whole
value, not just the first physical line of the value.

(cherry picked from commit ed81971)

Co-authored-by: RanKKI <hliu86.me@gmail.com>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
  • Loading branch information
4 people authored Nov 17, 2024
1 parent 50e42b9 commit 9d986d9
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 6 deletions.
4 changes: 2 additions & 2 deletions Lib/email/_policybase.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,12 +302,12 @@ def header_source_parse(self, sourcelines):
"""+
The name is parsed as everything up to the ':' and returned unmodified.
The value is determined by stripping leading whitespace off the
remainder of the first line, joining all subsequent lines together, and
remainder of the first line joined with all subsequent lines, and
stripping any trailing carriage return or linefeed characters.
"""
name, value = sourcelines[0].split(':', 1)
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n')
return (name, value.rstrip('\r\n'))

def header_store_parse(self, name, value):
Expand Down
4 changes: 2 additions & 2 deletions Lib/email/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,13 @@ def header_source_parse(self, sourcelines):
"""+
The name is parsed as everything up to the ':' and returned unmodified.
The value is determined by stripping leading whitespace off the
remainder of the first line, joining all subsequent lines together, and
remainder of the first line joined with all subsequent lines, and
stripping any trailing carriage return or linefeed characters. (This
is the same as Compat32).
"""
name, value = sourcelines[0].split(':', 1)
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n')
return (name, value.rstrip('\r\n'))

def header_store_parse(self, name, value):
Expand Down
50 changes: 48 additions & 2 deletions Lib/test/test_email/test_message.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
import textwrap
from email import policy, message_from_string
import unittest
from email import message_from_bytes, message_from_string, policy
from email.message import EmailMessage, MIMEPart
from test.test_email import TestEmailBase, parameterize

Expand Down Expand Up @@ -958,6 +958,52 @@ def test_folding_with_utf8_encoding_8(self):
b'123456789-123456789\n 123456789 Hello '
b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')

def test_folding_with_short_nospace_1(self):
# bpo-36520
#
# Fold a line that contains a long whitespace after
# the fold point.

m = EmailMessage(policy.default)
m['Message-ID'] = '123456789' * 3
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_folding_with_long_nospace_default_policy_1(self):
# Fixed: https://github.com/python/cpython/issues/124452
#
# When the value is too long, it should be converted back
# to its original form without any modifications.

m = EmailMessage(policy.default)
message = '123456789' * 10
m['Message-ID'] = message
self.assertEqual(m.as_bytes(),
f'Message-ID:\n {message}\n\n'.encode())
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_folding_with_long_nospace_compat32_policy_1(self):
m = EmailMessage(policy.compat32)
message = '123456789' * 10
m['Message-ID'] = message
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_folding_with_long_nospace_smtp_policy_1(self):
m = EmailMessage(policy.SMTP)
message = '123456789' * 10
m['Message-ID'] = message
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_folding_with_long_nospace_http_policy_1(self):
m = EmailMessage(policy.HTTP)
message = '123456789' * 10
m['Message-ID'] = message
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_get_body_malformed(self):
"""test for bpo-42892"""
msg = textwrap.dedent("""\
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix an issue in :meth:`email.policy.EmailPolicy.header_source_parse` and
:meth:`email.policy.Compat32.header_source_parse` that introduced spurious
leading whitespaces into header values when the header includes a newline
character after the header name delimiter (``:``) and before the value.

0 comments on commit 9d986d9

Please sign in to comment.