From ae90dc47521f6047f71befcb3551686cf857208d Mon Sep 17 00:00:00 2001 From: Jey Nandakumar Date: Mon, 10 Feb 2020 16:53:11 +0000 Subject: [PATCH] fix(commons): avoid unicode regex encoding in axe.min.js (#2024) * fix: return unicode as literal instead of a regexp object * run fmt * reformat comments --- lib/commons/text/unicode.js | 52 +++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/lib/commons/text/unicode.js b/lib/commons/text/unicode.js index 2b11476c6d..c854ee6f98 100644 --- a/lib/commons/text/unicode.js +++ b/lib/commons/text/unicode.js @@ -71,31 +71,33 @@ function getUnicodeNonBmpRegExp() { * Regex for matching astral plane unicode * - http://kourge.net/projects/regexp-unicode-block */ - return new RegExp( - '[' + - '\u1D00-\u1D7F' + // Phonetic Extensions - '\u1D80-\u1DBF' + // Phonetic Extensions Supplement - '\u1DC0-\u1DFF' + // Combining Diacritical Marks Supplement - // '\u2000-\u206F' + // General punctuation - handled in -> getPunctuationRegExp - '\u20A0-\u20CF' + // Currency symbols - '\u20D0-\u20FF' + // Combining Diacritical Marks for Symbols - '\u2100-\u214F' + // Letter like symbols - '\u2150-\u218F' + // Number forms (eg: Roman numbers) - '\u2190-\u21FF' + // Arrows - '\u2200-\u22FF' + // Mathematical operators - '\u2300-\u23FF' + // Misc Technical - '\u2400-\u243F' + // Control pictures - '\u2440-\u245F' + // OCR - '\u2460-\u24FF' + // Enclosed alpha numerics - '\u2500-\u257F' + // Box Drawing - '\u2580-\u259F' + // Block Elements - '\u25A0-\u25FF' + // Geometric Shapes - '\u2600-\u26FF' + // Misc Symbols - '\u2700-\u27BF' + // Dingbats - '\uE000-\uF8FF' + // Private Use - ']', - 'g' - ); + + /** + * Notes on various unicode planes being used in the regex below: + * '\u1D00-\u1D7F' Phonetic Extensions + * '\u1D80-\u1DBF' Phonetic Extensions Supplement + * '\u1DC0-\u1DFF' Combining Diacritical Marks Supplement + * '\u20A0-\u20CF' Currency symbols + * '\u20D0-\u20FF' Combining Diacritical Marks for Symbols + * '\u2100-\u214F' Letter like symbols + * '\u2150-\u218F' Number forms (eg: Roman numbers) + * '\u2190-\u21FF' Arrows + * '\u2200-\u22FF' Mathematical operators + * '\u2300-\u23FF' Misc Technical + * '\u2400-\u243F' Control pictures + * '\u2440-\u245F' OCR + * '\u2460-\u24FF' Enclosed alpha numerics + * '\u2500-\u257F' Box Drawing + * '\u2580-\u259F' Block Elements + * '\u25A0-\u25FF' Geometric Shapes + * '\u2600-\u26FF' Misc Symbols + * '\u2700-\u27BF' Dingbats + * '\uE000-\uF8FF' Private Use + * + * Note: plane '\u2000-\u206F' used for General punctuation is excluded as it is handled in -> getPunctuationRegExp + */ + + return /[\u1D00-\u1D7F\u1D80-\u1DBF\u1DC0-\u1DFF\u20A0-\u20CF\u20D0-\u20FF\u2100-\u214F\u2150-\u218F\u2190-\u21FF\u2200-\u22FF\u2300-\u23FF\u2400-\u243F\u2440-\u245F\u2460-\u24FF\u2500-\u257F\u2580-\u259F\u25A0-\u25FF\u2600-\u26FF\u2700-\u27BF\uE000-\uF8FF]/g; } /**