python · gpshead · Nov 8, 2022 · Nov 4, 2022 · Nov 4, 2022 · Nov 4, 2022
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
@@ -39,23 +39,21 @@ def nameprep(label):
 
     # Check bidi
     RandAL = [stringprep.in_table_d1(x) for x in label]
-    for c in RandAL:
-        if c:
-            # There is a RandAL char in the string. Must perform further
-            # tests:
-            # 1) The characters in section 5.8 MUST be prohibited.
-            # This is table C.8, which was already checked
-            # 2) If a string contains any RandALCat character, the string
-            # MUST NOT contain any LCat character.
-            if any(stringprep.in_table_d2(x) for x in label):
-                raise UnicodeError("Violation of BIDI requirement 2")
-
-            # 3) If a string contains any RandALCat character, a
-            # RandALCat character MUST be the first character of the
-            # string, and a RandALCat character MUST be the last
-            # character of the string.
-            if not RandAL[0] or not RandAL[-1]:
-                raise UnicodeError("Violation of BIDI requirement 3")
+    if any(RandAL):
+        # There is a RandAL char in the string. Must perform further
+        # tests:
+        # 1) The characters in section 5.8 MUST be prohibited.
+        # This is table C.8, which was already checked
+        # 2) If a string contains any RandALCat character, the string
+        # MUST NOT contain any LCat character.
+        if any(stringprep.in_table_d2(x) for x in label):
+            raise UnicodeError("Violation of BIDI requirement 2")
+        # 3) If a string contains any RandALCat character, a
+        # RandALCat character MUST be the first character of the
+        # string, and a RandALCat character MUST be the last
+        # character of the string.
+        if not RandAL[0] or not RandAL[-1]:
+            raise UnicodeError("Violation of BIDI requirement 3")
 
     return label
 
@@ -103,6 +101,16 @@ def ToASCII(label):
     raise UnicodeError("label empty or too long")
 
 def ToUnicode(label):
+    if len(label) > 1024:
+        # Protection from https://github.com/python/cpython/issues/98433.
+        # https://datatracker.ietf.org/doc/html/rfc5894#section-6
+        # doesn't specify a label size limit prior to NAMEPREP. But having
+        # one makes practical sense.
+        # This leaves ample room for nameprep() to remove Nothing characters
+        # per https://www.rfc-editor.org/rfc/rfc3454#section-3.1 while still
+        # preventing us from wasting time decoding a big thing that'll just
+        # hit the actual <= 63 length limit in Step 6.
+        raise UnicodeError("label way too long")
     # Step 1: Check for ASCII
     if isinstance(label, bytes):
         pure_ascii = True

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -1552,6 +1552,12 @@ def test_builtin_encode(self):
         self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
         self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
 
+    def test_builtin_decode_length_limit(self):
+        with self.assertRaisesRegex(UnicodeError, "way too long"):
+            (b"xn--016c"+b"a"*1100).decode("idna")
+        with self.assertRaisesRegex(UnicodeError, "too long"):
+            (b"xn--016c"+b"a"*70).decode("idna")
+
     def test_stream(self):
         r = codecs.getreader("idna")(io.BytesIO(b"abc"))
         r.read(3)

diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
@@ -0,0 +1,14 @@
+The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio`
+related name resolution functions no longer involves a quadratic algorithm.
+This prevents a potential CPU denial of service if an out-of-spec excessive
+length hostname involving bidirectional characters were decoded. Some protocols
+such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker
+to supply such a name.
+
+Individual labels within an IDNA encoded DNS name will now raise an error early
+during IDNA decoding if they are longer than 1024 unicode characters given that
+each decoded DNS label must be 63 or fewer characters and the entire decoded
+DNS name is limited to 255. Only an application presenting a hostname or label
+consisting primarily of :rfc:`3454` section 3.1 "Nothing" characters to be
+removed would run into of this new limit. See also :rfc:`5894` section 6 and
+:rfc:`3491`.