From 365a6cb2e43b51a2a0fa80b8b21f489f3a397532 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" <greg@krypto.org> Date: Fri, 4 Nov 2022 09:29:46 +0000 Subject: [PATCH] gh-98433: Fix quadratic time idna decoding. There was an unnecessary quadratic loop in idna decoding. This restores the behavior to linear. An early length check would still be a good idea given that DNS IDNA label names cannot be more than 63 ASCII characters. --- Lib/encodings/idna.py | 3 ++- Lib/test/test_codecs.py | 16 ++++++++++++++++ ...2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst | 3 +++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index ea4058512fe366..54ed582909e068 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -39,6 +39,7 @@ def nameprep(label): # Check bidi RandAL = [stringprep.in_table_d1(x) for x in label] + any_in_table_d2 = any(stringprep.in_table_d2(x) for x in label) for c in RandAL: if c: # There is a RandAL char in the string. Must perform further @@ -47,7 +48,7 @@ def nameprep(label): # This is table C.8, which was already checked # 2) If a string contains any RandALCat character, the string # MUST NOT contain any LCat character. - if any(stringprep.in_table_d2(x) for x in label): + if any_in_table_d2: raise UnicodeError("Violation of BIDI requirement 2") # 3) If a string contains any RandALCat character, a diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 32a704f4e97e41..0929736a573a9a 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3,6 +3,7 @@ import io import locale import sys +import time import unittest import encodings from unittest import mock @@ -1552,6 +1553,21 @@ def test_builtin_encode(self): self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org") self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.") + def test_builtin_decode_length_limit(self): + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + # This was slow prior to GH-98433's quadratic loop being fixed. + # Before: 12s on a rpi4 --with-pydebug. After: 0.12s + with self.assertRaises(UnicodeError) as ctx: + start = get_time() + (b"xn--016c"+b"a"*1000).decode("idna") + seconds_to_decode_idna_length_fail = get_time() - start + self.assertIn("too long", str(ctx.exception)) + self.assertLess( + elapsed_seconds, 4, + msg="idna decoding length failure took waaaay too long") + def test_stream(self): r = codecs.getreader("idna")(io.BytesIO(b"abc")) r.read(3) diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst new file mode 100644 index 00000000000000..290a8680f3ef88 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst @@ -0,0 +1,3 @@ +The IDNA codec decoder used on DNS hostnames no longer involves a quadratic +algorithm. This prevents a potential CPU denial of service if an out-of-spec +excessive length hostname involving bidirectional characters is decoded.