From 365a6cb2e43b51a2a0fa80b8b21f489f3a397532 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith [Google]" <greg@krypto.org>
Date: Fri, 4 Nov 2022 09:29:46 +0000
Subject: [PATCH] gh-98433: Fix quadratic time idna decoding.

There was an unnecessary quadratic loop in idna decoding. This restores
the behavior to linear.

An early length check would still be a good idea given that DNS IDNA
label names cannot be more than 63 ASCII characters.
---
 Lib/encodings/idna.py                            |  3 ++-
 Lib/test/test_codecs.py                          | 16 ++++++++++++++++
 ...2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst |  3 +++
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst

diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index ea4058512fe366..54ed582909e068 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -39,6 +39,7 @@ def nameprep(label):
 
     # Check bidi
     RandAL = [stringprep.in_table_d1(x) for x in label]
+    any_in_table_d2 = any(stringprep.in_table_d2(x) for x in label)
     for c in RandAL:
         if c:
             # There is a RandAL char in the string. Must perform further
@@ -47,7 +48,7 @@ def nameprep(label):
             # This is table C.8, which was already checked
             # 2) If a string contains any RandALCat character, the string
             # MUST NOT contain any LCat character.
-            if any(stringprep.in_table_d2(x) for x in label):
+            if any_in_table_d2:
                 raise UnicodeError("Violation of BIDI requirement 2")
 
             # 3) If a string contains any RandALCat character, a
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 32a704f4e97e41..0929736a573a9a 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3,6 +3,7 @@
 import io
 import locale
 import sys
+import time
 import unittest
 import encodings
 from unittest import mock
@@ -1552,6 +1553,21 @@ def test_builtin_encode(self):
         self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
         self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
 
+    def test_builtin_decode_length_limit(self):
+        get_time = time.process_time
+        if get_time() <= 0:  # some platforms like WASM lack process_time()
+            get_time = time.monotonic
+        # This was slow prior to GH-98433's quadratic loop being fixed.
+        # Before: 12s on a rpi4 --with-pydebug. After: 0.12s
+        with self.assertRaises(UnicodeError) as ctx:
+            start = get_time()
+            (b"xn--016c"+b"a"*1000).decode("idna")
+        seconds_to_decode_idna_length_fail = get_time() - start
+        self.assertIn("too long", str(ctx.exception))
+        self.assertLess(
+                elapsed_seconds, 4,
+                msg="idna decoding length failure took waaaay too long")
+
     def test_stream(self):
         r = codecs.getreader("idna")(io.BytesIO(b"abc"))
         r.read(3)
diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
new file mode 100644
index 00000000000000..290a8680f3ef88
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
@@ -0,0 +1,3 @@
+The IDNA codec decoder used on DNS hostnames no longer involves a quadratic
+algorithm. This prevents a potential CPU denial of service if an out-of-spec
+excessive length hostname involving bidirectional characters is decoded.