Skip to content

Commit

Permalink
Refactor to improve performance w/ weak map, hoisted regex
Browse files Browse the repository at this point in the history
Closes GH-13.

Reviewed-by: Titus Wormer <tituswormer@gmail.com>
  • Loading branch information
bluwy authored Apr 3, 2024
1 parent 040c003 commit 2649911
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 13 deletions.
47 changes: 37 additions & 10 deletions lib/core.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* @typedef CoreOptions
* @property {Array<string>} [subset=[]]
* @property {ReadonlyArray<string>} [subset=[]]
* Whether to only escape the given subset of characters.
* @property {boolean} [escapeOnly=false]
* Whether to only escape possibly dangerous characters.
Expand All @@ -13,6 +13,16 @@
* @typedef {CoreOptions & FormatOptions & import('./util/format-smart.js').FormatSmartOptions} CoreWithFormatOptions
*/

const defaultSubsetRegex = /["&'<>`]/g
const surrogatePairsRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
const controlCharactersRegex =
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
/[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g
const regexEscapeRegex = /[|\\{}()[\]^$+*?.]/g

/** @type {WeakMap<ReadonlyArray<string>, RegExp>} */
const subsetToRegexCache = new WeakMap()

/**
* Encode certain characters in `value`.
*
Expand All @@ -22,7 +32,9 @@
*/
export function core(value, options) {
value = value.replace(
options.subset ? charactersToExpression(options.subset) : /["&'<>`]/g,
options.subset
? charactersToExpressionCached(options.subset)
: defaultSubsetRegex,
basic
)

Expand All @@ -33,14 +45,10 @@ export function core(value, options) {
return (
value
// Surrogate pairs.
.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g, surrogate)
.replace(surrogatePairsRegex, surrogate)
// BMP control characters (C0 except for LF, CR, SP; DEL; and some more
// non-ASCII ones).
.replace(
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
/[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g,
basic
)
.replace(controlCharactersRegex, basic)
)

/**
Expand Down Expand Up @@ -74,7 +82,26 @@ export function core(value, options) {
}

/**
* @param {Array<string>} subset
* A wrapper function that caches the result of `charactersToExpression` with a WeakMap.
* This can improve performance when tooling calls `charactersToExpression` repeatedly
* with the same subset.
*
* @param {ReadonlyArray<string>} subset
* @returns {RegExp}
*/
function charactersToExpressionCached(subset) {
let cached = subsetToRegexCache.get(subset)

if (!cached) {
cached = charactersToExpression(subset)
subsetToRegexCache.set(subset, cached)
}

return cached
}

/**
* @param {ReadonlyArray<string>} subset
* @returns {RegExp}
*/
function charactersToExpression(subset) {
Expand All @@ -83,7 +110,7 @@ function charactersToExpression(subset) {
let index = -1

while (++index < subset.length) {
groups.push(subset[index].replace(/[|\\{}()[\]^$+*?.]/g, '\\$&'))
groups.push(subset[index].replace(regexEscapeRegex, '\\$&'))
}

return new RegExp('(?:' + groups.join('|') + ')', 'g')
Expand Down
4 changes: 3 additions & 1 deletion lib/util/to-decimal.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const decimalRegex = /\d/

/**
* Configurable ways to encode characters as decimal references.
*
Expand All @@ -8,7 +10,7 @@
*/
export function toDecimal(code, next, omit) {
const value = '&#' + String(code)
return omit && next && !/\d/.test(String.fromCharCode(next))
return omit && next && !decimalRegex.test(String.fromCharCode(next))
? value
: value + ';'
}
4 changes: 3 additions & 1 deletion lib/util/to-hexadecimal.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const hexadecimalRegex = /[\dA-Fa-f]/

/**
* Configurable ways to encode characters as hexadecimal references.
*
Expand All @@ -8,7 +10,7 @@
*/
export function toHexadecimal(code, next, omit) {
const value = '&#x' + code.toString(16).toUpperCase()
return omit && next && !/[\dA-Fa-f]/.test(String.fromCharCode(next))
return omit && next && !hexadecimalRegex.test(String.fromCharCode(next))
? value
: value + ';'
}
4 changes: 3 additions & 1 deletion lib/util/to-named.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ for (key in characterEntitiesHtml4) {
}
}

const notAlphanumericRegex = /[^\dA-Za-z]/

/**
* Configurable ways to encode characters as named references.
*
Expand All @@ -43,7 +45,7 @@ export function toNamed(code, next, omit, attribute) {
(!attribute ||
(next &&
next !== 61 /* `=` */ &&
/[^\da-z]/i.test(String.fromCharCode(next))))
notAlphanumericRegex.test(String.fromCharCode(next))))
) {
return value
}
Expand Down

0 comments on commit 2649911

Please sign in to comment.