From 010a482c97fc0a4aa82360b2e31500fe65e4276b Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Thu, 3 Jun 2021 10:47:43 +0200 Subject: [PATCH 1/9] WIP --- README.md | 1 + docs/rules/README.md | 1 + docs/rules/sort-alternatives.md | 39 ++++++ lib/rules/sort-alternatives.ts | 199 +++++++++++++++++++++++++++ lib/utils/rules.ts | 2 + tests/lib/rules/sort-alternatives.ts | 28 ++++ 6 files changed, 270 insertions(+) create mode 100644 docs/rules/sort-alternatives.md create mode 100644 lib/rules/sort-alternatives.ts create mode 100644 tests/lib/rules/sort-alternatives.ts diff --git a/README.md b/README.md index eea17df00..f875b1458 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco | [regexp/prefer-range](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-range.html) | enforce using character class range | :wrench: | | [regexp/prefer-regexp-exec](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-regexp-exec.html) | enforce that `RegExp#exec` is used instead of `String#match` if no global flag is provided | | | [regexp/prefer-regexp-test](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-regexp-test.html) | enforce that `RegExp#test` is used instead of `String#match` and `RegExp#exec` | :wrench: | +| [regexp/sort-alternatives](https://ota-meshi.github.io/eslint-plugin-regexp/rules/sort-alternatives.html) | sort alternatives if order doesn't matter | :wrench: | ### Stylistic Issues diff --git a/docs/rules/README.md b/docs/rules/README.md index 80442805f..fd71ccb7b 100644 --- a/docs/rules/README.md +++ b/docs/rules/README.md @@ -60,6 +60,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco | [regexp/prefer-range](./prefer-range.md) | enforce using character class range | :wrench: | | [regexp/prefer-regexp-exec](./prefer-regexp-exec.md) | enforce that `RegExp#exec` is used instead of `String#match` if no global flag is provided | | | [regexp/prefer-regexp-test](./prefer-regexp-test.md) | enforce that `RegExp#test` is used instead of `String#match` and `RegExp#exec` | :wrench: | +| [regexp/sort-alternatives](./sort-alternatives.md) | sort alternatives if order doesn't matter | :wrench: | ### Stylistic Issues diff --git a/docs/rules/sort-alternatives.md b/docs/rules/sort-alternatives.md new file mode 100644 index 000000000..18bda7d4b --- /dev/null +++ b/docs/rules/sort-alternatives.md @@ -0,0 +1,39 @@ +--- +pageClass: "rule-details" +sidebarDepth: 0 +title: "regexp/sort-alternatives" +description: "sort alternatives if order doesn't matter" +--- +# regexp/sort-alternatives + +> sort alternatives if order doesn't matter + +- :exclamation: ***This rule has not been released yet.*** +- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule. + +## :book: Rule Details + +This rule reports ???. + + + +```js +/* eslint regexp/sort-alternatives: "error" */ + +/* ✓ GOOD */ + + +/* ✗ BAD */ + +``` + + + +## :wrench: Options + +Nothing. + +## :mag: Implementation + +- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/sort-alternatives.ts) +- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/sort-alternatives.ts) diff --git a/lib/rules/sort-alternatives.ts b/lib/rules/sort-alternatives.ts new file mode 100644 index 000000000..d246a7de1 --- /dev/null +++ b/lib/rules/sort-alternatives.ts @@ -0,0 +1,199 @@ +import type { RegExpVisitor } from "regexpp/visitor" +import type { Alternative, CapturingGroup, Element, Group } from "regexpp/ast" +import type { RegExpContext } from "../utils" +import { createRule, defineRegexpVisitor } from "../utils" +import { + Chars, + getFirstCharAfter, + getFirstConsumedChar, + hasSomeDescendant, + isEmptyBackreference, +} from "regexp-ast-analysis" +import type { CharRange, CharSet } from "refa" + +/** + * Returns the union of all characters that can possibly be consumed by the + * given element. + */ +function getConsumedChars( + element: Element | Alternative, + context: RegExpContext, +): CharSet { + const ranges: CharRange[] = [] + + // we misuse hasSomeDescendant to iterate all relevant elements + hasSomeDescendant( + element, + (d) => { + if ( + d.type === "Character" || + d.type === "CharacterClass" || + d.type === "CharacterSet" + ) { + ranges.push(...context.toCharSet(d).ranges) + } else if (d.type === "Backreference" && !isEmptyBackreference(d)) { + ranges.push(...getConsumedChars(d.resolved, context).ranges) + } + + // always continue to the next element + return false + }, + // don't go into assertions + (d) => d.type !== "Assertion" && d.type !== "CharacterClass", + ) + + return Chars.empty(context.flags).union(ranges) +} + +/** + * Assuming that the given group only consumes the given characters, this will + * return whether the alternatives of the group can be reordered freely without + * affecting the behavior of the regex. + * + * This also assumes that the alternatives of the given group do not contain + * capturing group in such a way that their order matters. + */ +function canReorder( + group: Group | CapturingGroup, + consumedChars: CharSet, + context: RegExpContext, +): boolean { + if (group.parent.type !== "Alternative") { + return false + } + + const parentElements = group.parent.elements + const groupIndex = parentElements.indexOf(group) + + let boundaryBefore = parentElements[groupIndex - 1]?.raw === "\\b" + let boundaryAfter = parentElements[groupIndex + 1]?.raw === "\\b" + + if (boundaryBefore && boundaryAfter) { + return true + } + + const words = Chars.word(context.flags) + + if (!boundaryBefore) { + const { char } = getFirstCharAfter(group, "rtl", context.flags) + boundaryBefore = char.isDisjointWith(words) + } + if (!boundaryAfter) { + const { char } = getFirstCharAfter(group, "ltr", context.flags) + boundaryAfter = char.isDisjointWith(words) + } + + return boundaryBefore && boundaryAfter +} + +/** + * Sorts the given alternatives. + */ +function sortAlternatives( + alternatives: Alternative[], + context: RegExpContext, +): void { + const firstChars = new Map() + for (const a of alternatives) { + const chars = getFirstConsumedChar(a, "ltr", context.flags) + const char = + chars.empty || chars.char.isEmpty + ? Infinity + : chars.char.ranges[0].min + firstChars.set(a, char) + } + + alternatives.sort((a, b) => { + const firstA = firstChars.get(a)! + const firstB = firstChars.get(b)! + if (firstA !== firstB) { + return firstA - firstB + } + + if (context.flags.ignoreCase) { + return a.raw.toUpperCase().localeCompare(b.raw.toUpperCase()) + } + + return a.raw.localeCompare(b.raw) + }) +} + +export default createRule("sort-alternatives", { + meta: { + docs: { + description: "sort alternatives if order doesn't matter", + category: "Best Practices", + recommended: false, + }, + fixable: "code", + schema: [], + messages: { + sort: + "The alternatives of this group can be sorted without affecting the regex.", + }, + type: "suggestion", // "problem", + }, + create(context) { + /** + * Create visitor + */ + function createVisitor( + regexpContext: RegExpContext, + ): RegExpVisitor.Handlers { + const { node, getRegexpLocation, fixReplaceNode } = regexpContext + + /** */ + function onGroup(group: Group | CapturingGroup): void { + if (group.alternatives.length < 2) { + return + } + + const consumedChars = getConsumedChars(group, regexpContext) + if (!canReorder(group, consumedChars, regexpContext)) { + return + } + + const alternatives = [...group.alternatives] + sortAlternatives(alternatives, regexpContext) + + const reordered = alternatives.some( + (a, i) => group.alternatives[i] !== a, + ) + + if (reordered) { + context.report({ + node, + loc: getRegexpLocation(group), + messageId: "sort", + fix: fixReplaceNode(group, () => { + const prefix = group.raw.slice( + 0, + group.alternatives[0].start - group.start, + ) + const suffix = group.raw.slice( + group.alternatives[ + group.alternatives.length - 1 + ].end - group.start, + ) + + return ( + prefix + + alternatives.map((a) => a.raw).join("|") + + suffix + ) + }), + }) + } + } + + return { + onGroupEnter: onGroup, + onCapturingGroupEnter: onGroup, + } + } + + return defineRegexpVisitor(context, { + createVisitor, + }) + }, +}) diff --git a/lib/utils/rules.ts b/lib/utils/rules.ts index 0bc71862a..065690c34 100644 --- a/lib/utils/rules.ts +++ b/lib/utils/rules.ts @@ -56,6 +56,7 @@ import preferStarQuantifier from "../rules/prefer-star-quantifier" import preferT from "../rules/prefer-t" import preferUnicodeCodepointEscapes from "../rules/prefer-unicode-codepoint-escapes" import preferW from "../rules/prefer-w" +import sortAlternatives from "../rules/sort-alternatives" import sortFlags from "../rules/sort-flags" import unicodeEscape from "../rules/unicode-escape" @@ -117,6 +118,7 @@ export const rules = [ preferT, preferUnicodeCodepointEscapes, preferW, + sortAlternatives, sortFlags, unicodeEscape, ] as RuleModule[] diff --git a/tests/lib/rules/sort-alternatives.ts b/tests/lib/rules/sort-alternatives.ts new file mode 100644 index 000000000..d34af503c --- /dev/null +++ b/tests/lib/rules/sort-alternatives.ts @@ -0,0 +1,28 @@ +import { RuleTester } from "eslint" +import rule from "../../../lib/rules/sort-alternatives" + +const tester = new RuleTester({ + parserOptions: { + ecmaVersion: 2020, + sourceType: "module", + }, +}) + +tester.run("sort-alternatives", rule as any, { + valid: [`/regexp/`], + invalid: [ + { + code: `/regexp/`, + errors: [ + { + messageId: "", + data: {}, + line: 1, + column: 1, + endLine: 1, + endColumn: 1, + }, + ], + }, + ], +}) From 8a2a052919154da832e64d3bb539c11a67130137 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Thu, 3 Jun 2021 15:41:25 +0200 Subject: [PATCH 2/9] WIP --- lib/rules/sort-alternatives.ts | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/lib/rules/sort-alternatives.ts b/lib/rules/sort-alternatives.ts index d246a7de1..a0ac2fdfa 100644 --- a/lib/rules/sort-alternatives.ts +++ b/lib/rules/sort-alternatives.ts @@ -58,32 +58,14 @@ function canReorder( consumedChars: CharSet, context: RegExpContext, ): boolean { - if (group.parent.type !== "Alternative") { - return false - } - - const parentElements = group.parent.elements - const groupIndex = parentElements.indexOf(group) - - let boundaryBefore = parentElements[groupIndex - 1]?.raw === "\\b" - let boundaryAfter = parentElements[groupIndex + 1]?.raw === "\\b" - - if (boundaryBefore && boundaryAfter) { - return true - } - - const words = Chars.word(context.flags) - - if (!boundaryBefore) { - const { char } = getFirstCharAfter(group, "rtl", context.flags) - boundaryBefore = char.isDisjointWith(words) - } - if (!boundaryAfter) { - const { char } = getFirstCharAfter(group, "ltr", context.flags) - boundaryAfter = char.isDisjointWith(words) - } - - return boundaryBefore && boundaryAfter + return ( + getFirstCharAfter(group, "rtl", context.flags).char.isDisjointWith( + consumedChars, + ) && + getFirstCharAfter(group, "ltr", context.flags).char.isDisjointWith( + consumedChars, + ) + ) } /** From 1908592e56a7b72e0ef980ae1e8af493765a1f8e Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Thu, 3 Jun 2021 15:46:40 +0200 Subject: [PATCH 3/9] Update to `regexp-ast-analysis@0.2.2` --- package-lock.json | 6 +++--- package.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/package-lock.json b/package-lock.json index 70a9c4420..c10372d92 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12116,9 +12116,9 @@ } }, "regexp-ast-analysis": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/regexp-ast-analysis/-/regexp-ast-analysis-0.2.0.tgz", - "integrity": "sha512-ZVNwBF65Gn4CbRdPNYle8NAPFSDbbJ83svYUk4Mpqmr1QTUx2M06my9x7o8Hw/oFXDwXrJo/7W+ijLfFM5oG2Q==", + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/regexp-ast-analysis/-/regexp-ast-analysis-0.2.2.tgz", + "integrity": "sha512-inLIbwizLLAZkpC9/8zp8CeSkJJPsRaqXGDVpxEbNRZD10E+OP6vRDHt7OS9JjWECQLvOI2EmHx7DgdpyAvdrQ==", "requires": { "refa": "^0.8.0", "regexpp": "^3.1.0" diff --git a/package.json b/package.json index 83fecb5ba..2f6695971 100644 --- a/package.json +++ b/package.json @@ -89,7 +89,7 @@ "eslint-utils": "^3.0.0", "jsdoctypeparser": "^9.0.0", "refa": "^0.8.0", - "regexp-ast-analysis": "^0.2.0", + "regexp-ast-analysis": "^0.2.2", "regexpp": "^3.1.0" } } From 8116d1ffbb41b3b2d84c7fd1a839d0940baa9fe8 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Fri, 4 Jun 2021 17:52:51 +0200 Subject: [PATCH 4/9] Improved sorting for numbers --- lib/rules/sort-alternatives.ts | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/lib/rules/sort-alternatives.ts b/lib/rules/sort-alternatives.ts index a0ac2fdfa..3c2050ddc 100644 --- a/lib/rules/sort-alternatives.ts +++ b/lib/rules/sort-alternatives.ts @@ -100,6 +100,28 @@ function sortAlternatives( }) } +/** + * This tries to sort the given alternatives by assuming that all alternatives + * are a number. + */ +function trySortNumberAlternatives(alternatives: Alternative[]): boolean { + const numbers = new Map() + for (const a of alternatives) { + const { raw } = a + if (/^\d+$/.test(raw)) { + numbers.set(a, Number(raw)) + } else { + return false + } + } + + alternatives.sort((a, b) => { + return numbers.get(a)! - numbers.get(b)! + }) + + return true +} + export default createRule("sort-alternatives", { meta: { docs: { @@ -131,12 +153,20 @@ export default createRule("sort-alternatives", { } const consumedChars = getConsumedChars(group, regexpContext) + if (consumedChars.isEmpty) { + // all alternatives are either empty or only contain + // assertions + return + } + if (!canReorder(group, consumedChars, regexpContext)) { return } const alternatives = [...group.alternatives] - sortAlternatives(alternatives, regexpContext) + if (!trySortNumberAlternatives(alternatives)) { + sortAlternatives(alternatives, regexpContext) + } const reordered = alternatives.some( (a, i) => group.alternatives[i] !== a, From 8b7e33764c6844f46739e267c5115af9bb59bd19 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 8 Jun 2021 13:34:32 +0200 Subject: [PATCH 5/9] Minor refactoring --- lib/rules/sort-alternatives.ts | 98 ++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 29 deletions(-) diff --git a/lib/rules/sort-alternatives.ts b/lib/rules/sort-alternatives.ts index 3c2050ddc..bcd168264 100644 --- a/lib/rules/sort-alternatives.ts +++ b/lib/rules/sort-alternatives.ts @@ -10,6 +10,7 @@ import { isEmptyBackreference, } from "regexp-ast-analysis" import type { CharRange, CharSet } from "refa" +import type { SourceLocation, Position } from "estree" /** * Returns the union of all characters that can possibly be consumed by the @@ -122,6 +123,26 @@ function trySortNumberAlternatives(alternatives: Alternative[]): boolean { return true } +/** + * Returns the combined source location of the two given locations. + */ +function unionLocations(a: SourceLocation, b: SourceLocation): SourceLocation { + /** x < y */ + function less(x: Position, y: Position): boolean { + if (x.line < y.line) { + return true + } else if (x.line > y.line) { + return false + } + return x.column < y.column + } + + return { + start: { ...(less(a.start, b.start) ? a.start : b.start) }, + end: { ...(less(a.end, b.end) ? b.end : a.end) }, + } +} + export default createRule("sort-alternatives", { meta: { docs: { @@ -146,7 +167,53 @@ export default createRule("sort-alternatives", { ): RegExpVisitor.Handlers { const { node, getRegexpLocation, fixReplaceNode } = regexpContext - /** */ + /** The handler for groups */ + function enforceSorted(sorted: Alternative[]): void { + const parent = sorted[0].parent + const unsorted = parent.alternatives + + const firstChanged = unsorted.findIndex( + (a, i) => a !== unsorted[i], + ) + + if (firstChanged === -1) { + // unsorted === sorted + return + } + + const lastChanged = [...unsorted] + .reverse() + .findIndex( + (a, i) => a !== unsorted[unsorted.length - 1 - i], + ) + + const loc = unionLocations( + getRegexpLocation(unsorted[firstChanged]), + getRegexpLocation(unsorted[lastChanged]), + ) + + context.report({ + node, + loc, + messageId: "sort", + fix: fixReplaceNode(parent, () => { + const prefix = parent.raw.slice( + 0, + parent.alternatives[0].start - parent.start, + ) + const suffix = parent.raw.slice( + parent.alternatives[parent.alternatives.length - 1] + .end - parent.start, + ) + + return ( + prefix + sorted.map((a) => a.raw).join("|") + suffix + ) + }), + }) + } + + /** The handler for groups */ function onGroup(group: Group | CapturingGroup): void { if (group.alternatives.length < 2) { return @@ -168,34 +235,7 @@ export default createRule("sort-alternatives", { sortAlternatives(alternatives, regexpContext) } - const reordered = alternatives.some( - (a, i) => group.alternatives[i] !== a, - ) - - if (reordered) { - context.report({ - node, - loc: getRegexpLocation(group), - messageId: "sort", - fix: fixReplaceNode(group, () => { - const prefix = group.raw.slice( - 0, - group.alternatives[0].start - group.start, - ) - const suffix = group.raw.slice( - group.alternatives[ - group.alternatives.length - 1 - ].end - group.start, - ) - - return ( - prefix + - alternatives.map((a) => a.raw).join("|") + - suffix - ) - }), - }) - } + enforceSorted(alternatives) } return { From ac22cc0a8011532c52eaa7f64b5f33dd49e8c796 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 8 Jun 2021 16:30:13 +0200 Subject: [PATCH 6/9] Finished work --- lib/rules/sort-alternatives.ts | 227 +++++++++++++++++++++------ tests/lib/rules/sort-alternatives.ts | 71 +++++++-- 2 files changed, 244 insertions(+), 54 deletions(-) diff --git a/lib/rules/sort-alternatives.ts b/lib/rules/sort-alternatives.ts index bcd168264..97b86c8f9 100644 --- a/lib/rules/sort-alternatives.ts +++ b/lib/rules/sort-alternatives.ts @@ -1,8 +1,17 @@ import type { RegExpVisitor } from "regexpp/visitor" -import type { Alternative, CapturingGroup, Element, Group } from "regexpp/ast" +import type { + Alternative, + CapturingGroup, + Element, + Group, + LookaroundAssertion, + Pattern, +} from "regexpp/ast" import type { RegExpContext } from "../utils" -import { createRule, defineRegexpVisitor } from "../utils" +import { CP_MINUS, CP_SPACE, createRule, defineRegexpVisitor } from "../utils" +import type { ReadonlyFlags } from "regexp-ast-analysis" import { + getLengthRange, Chars, getFirstCharAfter, getFirstConsumedChar, @@ -10,14 +19,43 @@ import { isEmptyBackreference, } from "regexp-ast-analysis" import type { CharRange, CharSet } from "refa" +import { JS } from "refa" import type { SourceLocation, Position } from "estree" +interface AllowedChars { + allowed: CharSet + required: CharSet +} +const cache = new Map>() + +/** */ +function getAllowedChars(flags: ReadonlyFlags) { + const cacheKey = (flags.ignoreCase ? "i" : "") + (flags.unicode ? "u" : "") + let result = cache.get(cacheKey) + if (result === undefined) { + result = { + allowed: JS.createCharSet( + [ + { kind: "word", negate: false }, + { min: CP_SPACE, max: CP_SPACE }, + { min: CP_MINUS, max: CP_MINUS }, + { min: CP_MINUS, max: CP_MINUS }, + ], + flags, + ), + required: Chars.word(flags), + } + cache.set(cacheKey, result) + } + return result +} + /** * Returns the union of all characters that can possibly be consumed by the * given element. */ function getConsumedChars( - element: Element | Alternative, + element: Element | Pattern | Alternative, context: RegExpContext, ): CharSet { const ranges: CharRange[] = [] @@ -46,6 +84,8 @@ function getConsumedChars( return Chars.empty(context.flags).union(ranges) } +type Parent = Group | CapturingGroup | Pattern | LookaroundAssertion + /** * Assuming that the given group only consumes the given characters, this will * return whether the alternatives of the group can be reordered freely without @@ -55,20 +95,50 @@ function getConsumedChars( * capturing group in such a way that their order matters. */ function canReorder( - group: Group | CapturingGroup, + parent: Parent, consumedChars: CharSet, context: RegExpContext, ): boolean { + const lengthRange = getLengthRange(parent.alternatives) + if (lengthRange && lengthRange.min === lengthRange.max) { + return true + } + + if (parent.type === "Pattern" || parent.type === "Assertion") { + return false + } + return ( - getFirstCharAfter(group, "rtl", context.flags).char.isDisjointWith( + getFirstCharAfter(parent, "rtl", context.flags).char.isDisjointWith( consumedChars, ) && - getFirstCharAfter(group, "ltr", context.flags).char.isDisjointWith( + getFirstCharAfter(parent, "ltr", context.flags).char.isDisjointWith( consumedChars, ) ) } +/** + * Returns whether the given element contains only literal characters and + * groups/other elements containing literal characters. + */ +function containsOnlyLiterals( + element: Element | Pattern | Alternative, +): boolean { + return !hasSomeDescendant( + element, + (d) => { + return ( + d.type === "Backreference" || + d.type === "CharacterSet" || + (d.type === "Quantifier" && d.max === Infinity) || + (d.type === "CharacterClass" && d.negate) + ) + }, + (d) => d.type !== "Assertion", + ) +} + /** * Sorts the given alternatives. */ @@ -105,22 +175,32 @@ function sortAlternatives( * This tries to sort the given alternatives by assuming that all alternatives * are a number. */ -function trySortNumberAlternatives(alternatives: Alternative[]): boolean { - const numbers = new Map() - for (const a of alternatives) { - const { raw } = a - if (/^\d+$/.test(raw)) { - numbers.set(a, Number(raw)) - } else { - return false +function trySortNumberAlternatives(alternatives: Alternative[]): void { + const numberRanges: [number, number][] = [] + { + let start = 0 + for (let i = 0; i < alternatives.length; i++) { + if (!/^0|[1-9]\d*$/.test(alternatives[i].raw)) { + if (start < i) { + numberRanges.push([start, i]) + } + start = i + 1 + } + } + if (start < alternatives.length) { + numberRanges.push([start, alternatives.length]) } } - alternatives.sort((a, b) => { - return numbers.get(a)! - numbers.get(b)! - }) + for (const [start, end] of numberRanges) { + const slice = alternatives.slice(start, end) + + slice.sort((a, b) => { + return Number(a.raw) - Number(b.raw) + }) - return true + alternatives.splice(start, end - start, ...slice) + } } /** @@ -143,6 +223,33 @@ function unionLocations(a: SourceLocation, b: SourceLocation): SourceLocation { } } +/** + * Returns the indexes of the first and last of original array that is changed + * when compared with the reordered one. + */ +function getReorderingBounds( + original: readonly T[], + reorder: readonly T[], +): [number, number] | undefined { + if (original.length !== reorder.length) { + return undefined + } + + const len = original.length + + let first = 0 + for (; first < len && original[first] === reorder[first]; first++); + + if (first === len) { + return undefined + } + + let last = len - 1 + for (; last >= 0 && original[last] === reorder[last]; last--); + + return [first, last] +} + export default createRule("sort-alternatives", { meta: { docs: { @@ -165,31 +272,31 @@ export default createRule("sort-alternatives", { function createVisitor( regexpContext: RegExpContext, ): RegExpVisitor.Handlers { - const { node, getRegexpLocation, fixReplaceNode } = regexpContext + const { + node, + getRegexpLocation, + fixReplaceNode, + flags, + } = regexpContext - /** The handler for groups */ + const allowedChars = getAllowedChars(flags) + + /** + * Creates a report if the sorted alternatives are different from + * the unsorted ones. + */ function enforceSorted(sorted: Alternative[]): void { const parent = sorted[0].parent const unsorted = parent.alternatives - const firstChanged = unsorted.findIndex( - (a, i) => a !== unsorted[i], - ) - - if (firstChanged === -1) { - // unsorted === sorted + const bounds = getReorderingBounds(unsorted, sorted) + if (!bounds) { return } - const lastChanged = [...unsorted] - .reverse() - .findIndex( - (a, i) => a !== unsorted[unsorted.length - 1 - i], - ) - const loc = unionLocations( - getRegexpLocation(unsorted[firstChanged]), - getRegexpLocation(unsorted[lastChanged]), + getRegexpLocation(unsorted[bounds[0]]), + getRegexpLocation(unsorted[bounds[1]]), ) context.report({ @@ -213,34 +320,66 @@ export default createRule("sort-alternatives", { }) } - /** The handler for groups */ - function onGroup(group: Group | CapturingGroup): void { - if (group.alternatives.length < 2) { + /** The handler for parents */ + function onParent(parent: Parent): void { + if (parent.alternatives.length < 2) { + return + } + + if (!containsOnlyLiterals(parent)) { + return + } + + if ( + hasSomeDescendant( + parent, + (d) => d !== parent && d.type === "CapturingGroup", + ) + ) { + // it's not safe to reorder alternatives with capturing groups return } - const consumedChars = getConsumedChars(group, regexpContext) + const consumedChars = getConsumedChars(parent, regexpContext) if (consumedChars.isEmpty) { // all alternatives are either empty or only contain // assertions return } - - if (!canReorder(group, consumedChars, regexpContext)) { + if (!consumedChars.isSubsetOf(allowedChars.allowed)) { + // contains some chars that are not allowed return } + if (consumedChars.isDisjointWith(allowedChars.required)) { + // doesn't contain required chars + return + } + + const alternatives = [...parent.alternatives] - const alternatives = [...group.alternatives] - if (!trySortNumberAlternatives(alternatives)) { + if (canReorder(parent, consumedChars, regexpContext)) { + // alternatives can be reordered freely sortAlternatives(alternatives, regexpContext) + trySortNumberAlternatives(alternatives) + } else if ( + !consumedChars.isDisjointWith(Chars.digit(flags)) && + canReorder( + parent, + consumedChars.intersect(Chars.digit(flags)), + regexpContext, + ) + ) { + // let's try to at least sort numbers + trySortNumberAlternatives(alternatives) } enforceSorted(alternatives) } return { - onGroupEnter: onGroup, - onCapturingGroupEnter: onGroup, + onGroupEnter: onParent, + onPatternEnter: onParent, + onCapturingGroupEnter: onParent, } } diff --git a/tests/lib/rules/sort-alternatives.ts b/tests/lib/rules/sort-alternatives.ts index d34af503c..8b896ba77 100644 --- a/tests/lib/rules/sort-alternatives.ts +++ b/tests/lib/rules/sort-alternatives.ts @@ -9,19 +9,70 @@ const tester = new RuleTester({ }) tester.run("sort-alternatives", rule as any, { - valid: [`/regexp/`], + valid: [String.raw`/c|bb|a/`, String.raw`/\b(?:a|\d+|c|b)\b/`], invalid: [ { - code: `/regexp/`, + code: String.raw`/c|b|a/`, + output: String.raw`/a|b|c/`, errors: [ - { - messageId: "", - data: {}, - line: 1, - column: 1, - endLine: 1, - endColumn: 1, - }, + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/\b(?:c|b|a)\b/`, + output: String.raw`/\b(?:a|b|c)\b/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/\b(?:A|a|C|c|B|b)\b/`, + output: String.raw`/\b(?:A|B|C|a|b|c)\b/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/\b(?:A|a|C|c|B|b)\b/i`, + output: String.raw`/\b(?:A|a|B|b|C|c)\b/i`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/\b(?:1|2|4|8|16|32|64|128|256|0)\b/`, + output: String.raw`/\b(?:0|1|2|4|8|16|32|64|128|256)\b/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + + { + code: String.raw`/\b(?:[Nn]umber|[Ss]tring|[Bb]oolean|Function|any|mixed|null|void)\b/`, + output: String.raw`/\b(?:[Bb]oolean|Function|[Nn]umber|[Ss]tring|any|mixed|null|void)\b/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/_(?:SERVER|GET|POST|FILES|REQUEST|SESSION|ENV|COOKIE)\b/`, + output: String.raw`/_(?:COOKIE|ENV|FILES|GET|POST|REQUEST|SERVER|SESSION)\b/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/\b[ui](?:128|16|32|64|8|size)\b/`, + output: String.raw`/\b[ui](?:8|16|32|64|128|size)\b/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/\((?:TM|R|C)\)/`, + output: String.raw`/\((?:C|R|TM)\)/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", ], }, ], From af69d32859826ba894d0a7f296eeb91e873f8ad5 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 8 Jun 2021 16:59:40 +0200 Subject: [PATCH 7/9] Allow apostrophes --- lib/rules/sort-alternatives.ts | 10 ++++++++-- lib/utils/unicode.ts | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/rules/sort-alternatives.ts b/lib/rules/sort-alternatives.ts index 97b86c8f9..f1b19bbb0 100644 --- a/lib/rules/sort-alternatives.ts +++ b/lib/rules/sort-alternatives.ts @@ -8,7 +8,13 @@ import type { Pattern, } from "regexpp/ast" import type { RegExpContext } from "../utils" -import { CP_MINUS, CP_SPACE, createRule, defineRegexpVisitor } from "../utils" +import { + CP_MINUS, + CP_SPACE, + CP_APOSTROPHE, + createRule, + defineRegexpVisitor, +} from "../utils" import type { ReadonlyFlags } from "regexp-ast-analysis" import { getLengthRange, @@ -39,7 +45,7 @@ function getAllowedChars(flags: ReadonlyFlags) { { kind: "word", negate: false }, { min: CP_SPACE, max: CP_SPACE }, { min: CP_MINUS, max: CP_MINUS }, - { min: CP_MINUS, max: CP_MINUS }, + { min: CP_APOSTROPHE, max: CP_APOSTROPHE }, ], flags, ), diff --git a/lib/utils/unicode.ts b/lib/utils/unicode.ts index 8de82a455..ee1773e9e 100644 --- a/lib/utils/unicode.ts +++ b/lib/utils/unicode.ts @@ -24,6 +24,7 @@ export const CP_BACK_SLASH = "\\".codePointAt(0)! export const CP_CLOSING_BRACKET = "]".codePointAt(0)! export const CP_CARET = "^".codePointAt(0)! export const CP_BACKTICK = "`".codePointAt(0)! +export const CP_APOSTROPHE = "'".codePointAt(0)! export const CP_OPENING_BRACE = "{".codePointAt(0)! export const CP_PIPE = "|".codePointAt(0)! export const CP_CLOSING_BRACE = "}".codePointAt(0)! From 632ec0e33cd4e2f8369d2035bee3bcd3298d68be Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 8 Jun 2021 17:00:36 +0200 Subject: [PATCH 8/9] Added docs --- docs/rules/sort-alternatives.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/rules/sort-alternatives.md b/docs/rules/sort-alternatives.md index 18bda7d4b..e4f4622ca 100644 --- a/docs/rules/sort-alternatives.md +++ b/docs/rules/sort-alternatives.md @@ -13,7 +13,11 @@ description: "sort alternatives if order doesn't matter" ## :book: Rule Details -This rule reports ???. +This rule will sort alternatives to improve readability and maintainability. + +The primary target of this rule are lists of words and/or numbers. These lists are somewhat common and sorting them makes it easy for readers to check whether a particular word or number is included. + +This rule will only sort alternatives if reordering the alternatives doesn't affect the pattern. @@ -21,10 +25,13 @@ This rule reports ???. /* eslint regexp/sort-alternatives: "error" */ /* ✓ GOOD */ - +var foo = /\b(1|2|3)\b/; +var foo = /\b(alpha|beta|gamma)\b/; /* ✗ BAD */ - +var foo = /\b(2|1|3)\b/; +var foo = /__(?:Foo|Bar)__/; +var foo = /\((?:TM|R|C)\)/; ``` From e45c3c0b3b82f30ae589b2c461c0ed71be49b6b3 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Wed, 9 Jun 2021 10:12:50 +0200 Subject: [PATCH 9/9] Updated compare function --- lib/rules/sort-alternatives.ts | 22 ++++++++++++++++++++-- tests/lib/rules/sort-alternatives.ts | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/lib/rules/sort-alternatives.ts b/lib/rules/sort-alternatives.ts index f1b19bbb0..40362f047 100644 --- a/lib/rules/sort-alternatives.ts +++ b/lib/rules/sort-alternatives.ts @@ -145,6 +145,21 @@ function containsOnlyLiterals( ) } +/** + * Compare two string independent of the current locale by byte order. + */ +function compareByteOrder(a: string, b: string): number { + const l = Math.min(a.length, b.length) + for (let i = 0; i < l; i++) { + const diff = a.charCodeAt(i) - b.charCodeAt(i) + if (diff !== 0) { + return diff + } + } + + return a.length - b.length +} + /** * Sorts the given alternatives. */ @@ -170,10 +185,13 @@ function sortAlternatives( } if (context.flags.ignoreCase) { - return a.raw.toUpperCase().localeCompare(b.raw.toUpperCase()) + return ( + compareByteOrder(a.raw.toUpperCase(), b.raw.toUpperCase()) || + compareByteOrder(a.raw, b.raw) + ) } - return a.raw.localeCompare(b.raw) + return compareByteOrder(a.raw, b.raw) }) } diff --git a/tests/lib/rules/sort-alternatives.ts b/tests/lib/rules/sort-alternatives.ts index 8b896ba77..0eb74d932 100644 --- a/tests/lib/rules/sort-alternatives.ts +++ b/tests/lib/rules/sort-alternatives.ts @@ -32,6 +32,13 @@ tester.run("sort-alternatives", rule as any, { "The alternatives of this group can be sorted without affecting the regex.", ], }, + { + code: String.raw`/\b(?:aa|aA|aB|ab)\b/`, + output: String.raw`/\b(?:aA|aB|aa|ab)\b/`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, { code: String.raw`/\b(?:A|a|C|c|B|b)\b/i`, output: String.raw`/\b(?:A|a|B|b|C|c)\b/i`, @@ -39,6 +46,20 @@ tester.run("sort-alternatives", rule as any, { "The alternatives of this group can be sorted without affecting the regex.", ], }, + { + code: String.raw`/\b(?:a|A|c|C|b|B)\b/i`, + output: String.raw`/\b(?:A|a|B|b|C|c)\b/i`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, + { + code: String.raw`/\b(?:aa|aA|aB|ab)\b/i`, + output: String.raw`/\b(?:aA|aa|aB|ab)\b/i`, + errors: [ + "The alternatives of this group can be sorted without affecting the regex.", + ], + }, { code: String.raw`/\b(?:1|2|4|8|16|32|64|128|256|0)\b/`, output: String.raw`/\b(?:0|1|2|4|8|16|32|64|128|256)\b/`,