diff --git a/number_homoglyphs.py b/number_homoglyphs.py index 10e248dea29..d422d6389ca 100644 --- a/number_homoglyphs.py +++ b/number_homoglyphs.py @@ -10,6 +10,8 @@ # which is Copyright (c) 2015 Rob Dawson under an MIT license: # https://github.com/codebox/homoglyph/blob/master/LICENSE +BASIC_HTML_TAG_REGEX = regex.compile(r'') + # Hex numbers are primarily used below, due to the possibility of the characters becoming corrupted when the file # is edited in editors which don't fully support Unicode, or even just on different operating systems. equivalents = { @@ -144,6 +146,9 @@ def normalize(text): + if len(text) > 3: + # Replace things that look like basic HTML tags with a space. + text = BASIC_HTML_TAG_REGEX.sub(' ', text) return text.translate(translate_table)