Skip to content

Commit

Permalink
pythongh-98433: Fix quadratic time idna decoding.
Browse files Browse the repository at this point in the history
There was an unnecessary quadratic loop in idna decoding. This restores
the behavior to linear.

An early length check would still be a good idea given that DNS IDNA
label names cannot be more than 63 ASCII characters.
  • Loading branch information
gpshead committed Nov 4, 2022
1 parent 016c7d3 commit 365a6cb
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
3 changes: 2 additions & 1 deletion Lib/encodings/idna.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def nameprep(label):

# Check bidi
RandAL = [stringprep.in_table_d1(x) for x in label]
any_in_table_d2 = any(stringprep.in_table_d2(x) for x in label)
for c in RandAL:
if c:
# There is a RandAL char in the string. Must perform further
Expand All @@ -47,7 +48,7 @@ def nameprep(label):
# This is table C.8, which was already checked
# 2) If a string contains any RandALCat character, the string
# MUST NOT contain any LCat character.
if any(stringprep.in_table_d2(x) for x in label):
if any_in_table_d2:
raise UnicodeError("Violation of BIDI requirement 2")

# 3) If a string contains any RandALCat character, a
Expand Down
16 changes: 16 additions & 0 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import io
import locale
import sys
import time
import unittest
import encodings
from unittest import mock
Expand Down Expand Up @@ -1552,6 +1553,21 @@ def test_builtin_encode(self):
self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")

def test_builtin_decode_length_limit(self):
get_time = time.process_time
if get_time() <= 0: # some platforms like WASM lack process_time()
get_time = time.monotonic
# This was slow prior to GH-98433's quadratic loop being fixed.
# Before: 12s on a rpi4 --with-pydebug. After: 0.12s
with self.assertRaises(UnicodeError) as ctx:
start = get_time()
(b"xn--016c"+b"a"*1000).decode("idna")
seconds_to_decode_idna_length_fail = get_time() - start
self.assertIn("too long", str(ctx.exception))
self.assertLess(
elapsed_seconds, 4,
msg="idna decoding length failure took waaaay too long")

def test_stream(self):
r = codecs.getreader("idna")(io.BytesIO(b"abc"))
r.read(3)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The IDNA codec decoder used on DNS hostnames no longer involves a quadratic
algorithm. This prevents a potential CPU denial of service if an out-of-spec
excessive length hostname involving bidirectional characters is decoded.

0 comments on commit 365a6cb

Please sign in to comment.