From 1a4380befbcbf9980c25205472c98ae2c4920e77 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sat, 6 Apr 2024 19:11:01 +0200 Subject: [PATCH 1/3] Add `regexp/unicode-property` rule --- README.md | 1 + docs/rules/index.md | 1 + docs/rules/unicode-property.md | 241 +++++++ lib/all-rules.ts | 2 + lib/rules/unicode-property.ts | 253 +++++++ lib/utils/unicode-alias.ts | 339 +++++++++ .../unicode-property.ts.eslintsnap | 649 ++++++++++++++++++ tests/lib/rules/unicode-property.ts | 144 ++++ 8 files changed, 1630 insertions(+) create mode 100644 docs/rules/unicode-property.md create mode 100644 lib/rules/unicode-property.ts create mode 100644 lib/utils/unicode-alias.ts create mode 100644 tests/lib/rules/__snapshots__/unicode-property.ts.eslintsnap create mode 100644 tests/lib/rules/unicode-property.ts diff --git a/README.md b/README.md index 2646364da..ee4d080e3 100644 --- a/README.md +++ b/README.md @@ -231,6 +231,7 @@ The `plugin.configs["flat/all"]` / `plugin:regexp/all` config enables all rules. | [sort-character-class-elements](https://ota-meshi.github.io/eslint-plugin-regexp/rules/sort-character-class-elements.html) | enforces elements order in character class | | | 🔧 | | | [sort-flags](https://ota-meshi.github.io/eslint-plugin-regexp/rules/sort-flags.html) | require regex flags to be sorted | 🟢 🔵 | | 🔧 | | | [unicode-escape](https://ota-meshi.github.io/eslint-plugin-regexp/rules/unicode-escape.html) | enforce consistent usage of unicode escape or unicode codepoint escape | | | 🔧 | | +| [unicode-property](https://ota-meshi.github.io/eslint-plugin-regexp/rules/unicode-property.html) | enforce consistent naming of unicode properties | | | 🔧 | | diff --git a/docs/rules/index.md b/docs/rules/index.md index 9899dabfc..0d6d1ce72 100644 --- a/docs/rules/index.md +++ b/docs/rules/index.md @@ -108,6 +108,7 @@ sidebarDepth: 0 | [sort-character-class-elements](sort-character-class-elements.md) | enforces elements order in character class | | | 🔧 | | | [sort-flags](sort-flags.md) | require regex flags to be sorted | 🟢 🔵 | | 🔧 | | | [unicode-escape](unicode-escape.md) | enforce consistent usage of unicode escape or unicode codepoint escape | | | 🔧 | | +| [unicode-property](unicode-property.md) | enforce consistent naming of unicode properties | | | 🔧 | | diff --git a/docs/rules/unicode-property.md b/docs/rules/unicode-property.md new file mode 100644 index 000000000..93391aab0 --- /dev/null +++ b/docs/rules/unicode-property.md @@ -0,0 +1,241 @@ +--- +pageClass: "rule-details" +sidebarDepth: 0 +title: "regexp/unicode-property" +description: "enforce consistent naming of unicode properties" +--- +# regexp/unicode-property + +🔧 This rule is automatically fixable by the [`--fix` CLI option](https://eslint.org/docs/latest/user-guide/command-line-interface#--fix). + + + +> enforce consistent naming of unicode properties + +## :book: Rule Details + +This rule helps to enforce consistent style and naming of unicode properties. + +There are many ways a single Unicode property can be expressed. E.g. `\p{L}`, `\p{Letter}`, `\p{gc=L}`, `\p{gc=Letter}`, `\p{General_Category=L}`, and `\p{General_Category=Letter}` are all equivalent. This rule can be configured in a variety of ways to control exactly which ones of those variants are allowed. The default configuration is intended to be a good starting point for most users. + + + +```js +/* eslint regexp/unicode-property: "error" */ + +/* ✓ GOOD */ +var re = /\p{L}/u; +var re = /\p{Letter}/u; +var re = /\p{Script=Greek}/u; +var re = /\p{scx=Greek}/u; +var re = /\p{Hex}/u; +var re = /\p{Hex_Digit}/u; + +/* ✗ BAD */ +var re = /\p{gc=L}/u; +var re = /\p{General_Category=Letter}/u; +var re = /\p{Script=Grek}/u; +``` + + + +## :wrench: Options + +```json +{ + "regexp/unicode-property": ["error", { + "generalCategory": "never", + "key": "ignore", + "property": { + "binary": "ignore", + "generalCategory": "ignore", + "script": "long", + } + }] +} +``` + +### `generalCategory: "never" | "always" | "ignore"` + +Values from the `General_Category` property can be expressed in two ways: either without or with the `gc=` (or `General_Category=`) prefix. E.g. `\p{Letter}` or `\p{gc=Letter}`. + +This option controls whether the `gc=` prefix is required or forbidden. + +- `"never"` (default): The `gc=` (or `General_Category=`) prefix is forbidden. + + + ```js + /* eslint regexp/unicode-property: ["error", { generalCategory: "never" }] */ + + var re = /\p{Letter}/u; + var re = /\p{gc=Letter}/u; + var re = /\p{General_Category=Letter}/u; + ``` + + + +- `"always"`: The `gc=` (or `General_Category=`) prefix is required. + + + ```js + /* eslint regexp/unicode-property: ["error", { generalCategory: "always" }] */ + + var re = /\p{Letter}/u; + var re = /\p{gc=Letter}/u; + var re = /\p{General_Category=Letter}/u; + ``` + + + +- `"ignore"`: Both with and without prefix is allowed. + + + ```js + /* eslint regexp/unicode-property: ["error", { generalCategory: "ignore" }] */ + + var re = /\p{Letter}/u; + var re = /\p{gc=Letter}/u; + var re = /\p{General_Category=Letter}/u; + ``` + + + +### `key: "short" | "long" | "ignore"` + +Unicode properties in key-value form (e.g. `\p{gc=Letter}`, `\P{scx=Greek}`) have two variants for the key: a short and a long form. E.g. `\p{gc=Letter}` and `\p{General_Category=Letter}`. + +This option controls whether the short or long form is required. + +- `"short"`: The key must be in short form. + + + ```js + /* eslint regexp/unicode-property: ["error", { key: "short", generalCategory: "ignore" }] */ + + var re = /\p{gc=Letter}/u; + var re = /\p{General_Category=Letter}/u; + var re = /\p{sc=Greek}/u; + var re = /\p{Script=Greek}/u; + var re = /\p{scx=Greek}/u; + var re = /\p{Script_Extensions=Greek}/u; + ``` + + + +- `"long"`: The key must be in long form. + + + ```js + /* eslint regexp/unicode-property: ["error", { key: "long", generalCategory: "ignore" }] */ + + var re = /\p{gc=Letter}/u; + var re = /\p{General_Category=Letter}/u; + var re = /\p{sc=Greek}/u; + var re = /\p{Script=Greek}/u; + var re = /\p{scx=Greek}/u; + var re = /\p{Script_Extensions=Greek}/u; + ``` + + + +- `"ignore"` (default): The key can be in either form. + + + ```js + /* eslint regexp/unicode-property: ["error", { key: "ignore", generalCategory: "ignore" }] */ + + var re = /\p{gc=Letter}/u; + var re = /\p{General_Category=Letter}/u; + var re = /\p{sc=Greek}/u; + var re = /\p{Script=Greek}/u; + var re = /\p{scx=Greek}/u; + var re = /\p{Script_Extensions=Greek}/u; + ``` + + + +### `property: "short" | "long" | "ignore" | object` + +Similar to `key`, most property names also have long and short forms. E.g. `\p{Letter}` and `\p{L}`. + +This option controls whether the short or long form is required. Which forms is required can be configured for each property type via an object. The object has to be of the type: + +```ts +{ + binary?: "short" | "long" | "ignore", + generalCategory?: "short" | "long" | "ignore", + script?: "short" | "long" | "ignore", +} +``` + +- `binary` controls the form of Binary Unicode properties. E.g. `ASCII`, `Any`, `Hex`. +- `generalCategory` controls the form of values from the `General_Category` property. E.g. `Letter`, `Ll`, `P`. +- `script` controls the form of values from the `Script` and `Script_Extensions` properties. E.g. `Greek`. + +If the option is set to a string instead of an object, it will be used for all property types. + +#### Examples + +All set to `"long"`: + + + +```js +/* eslint regexp/unicode-property: ["error", { property: "long" }] */ + +var re = /\p{Hex}/u; +var re = /\p{Hex_Digit}/u; +var re = /\p{L}/u; +var re = /\p{Letter}/u; +var re = /\p{sc=Grek}/u; +var re = /\p{sc=Greek}/u; +``` + + + +All set to `"short"`: + + + +```js +/* eslint regexp/unicode-property: ["error", { property: "short" }] */ + +var re = /\p{Hex}/u; +var re = /\p{Hex_Digit}/u; +var re = /\p{L}/u; +var re = /\p{Letter}/u; +var re = /\p{sc=Grek}/u; +var re = /\p{sc=Greek}/u; +``` + + + +Binary properties and values of the `General_Category` property set to `"short"` and values of the `Script` property set to `"long"`: + + + +```js +/* eslint regexp/unicode-property: ["error", { property: { binary: "short", generalCategory: "short", script: "long" } }] */ + +var re = /\p{Hex}/u; +var re = /\p{Hex_Digit}/u; +var re = /\p{L}/u; +var re = /\p{Letter}/u; +var re = /\p{sc=Grek}/u; +var re = /\p{sc=Greek}/u; +``` + + + +## :books: Further reading + +- [MDN docs on Unicode property escapes](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Unicode_character_class_escape) + +## :rocket: Version + +:exclamation: ***This rule has not been released yet.*** + +## :mag: Implementation + +- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/unicode-property.ts) +- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/unicode-property.ts) diff --git a/lib/all-rules.ts b/lib/all-rules.ts index a0053e897..eea18f079 100644 --- a/lib/all-rules.ts +++ b/lib/all-rules.ts @@ -78,6 +78,7 @@ import sortCharacterClassElements from "./rules/sort-character-class-elements" import sortFlags from "./rules/sort-flags" import strict from "./rules/strict" import unicodeEscape from "./rules/unicode-escape" +import unicodeProperty from "./rules/unicode-property" import useIgnoreCase from "./rules/use-ignore-case" import type { RuleModule } from "./types" @@ -162,5 +163,6 @@ export const rules: RuleModule[] = [ sortFlags, strict, unicodeEscape, + unicodeProperty, useIgnoreCase, ] diff --git a/lib/rules/unicode-property.ts b/lib/rules/unicode-property.ts new file mode 100644 index 000000000..2691df715 --- /dev/null +++ b/lib/rules/unicode-property.ts @@ -0,0 +1,253 @@ +import type { UnicodePropertyCharacterSet } from "@eslint-community/regexpp/ast" +import type { RegExpVisitor } from "@eslint-community/regexpp/visitor" +import type { RegExpContext } from "../utils" +import { createRule, defineRegexpVisitor } from "../utils" +import { + UNICODE_BINARY_PROPERTY_ALIAS, + UNICODE_CATEGORY_ALIAS, + UNICODE_GENERAL_CATEGORY_ALIAS, + UNICODE_SCRIPT_ALIAS, +} from "../utils/unicode-alias" + +function isGeneralCategory(key: string): boolean { + return UNICODE_CATEGORY_ALIAS.toShort(key) === "gc" +} + +export default createRule("unicode-property", { + meta: { + docs: { + description: "enforce consistent naming of unicode properties", + category: "Stylistic Issues", + recommended: false, + }, + schema: [ + { + type: "object", + properties: { + generalCategory: { + enum: ["always", "never", "ignore"], + }, + key: { + enum: ["short", "long", "ignore"], + }, + property: { + anyOf: [ + { + enum: ["short", "long", "ignore"], + }, + { + type: "object", + properties: { + binary: { + enum: ["short", "long", "ignore"], + }, + generalCategory: { + enum: ["short", "long", "ignore"], + }, + script: { + enum: ["short", "long", "ignore"], + }, + }, + additionalProperties: false, + }, + ], + }, + }, + additionalProperties: false, + }, + ], + messages: { + unnecessaryGc: "Unnecessary '{{ gc }}=' in Unicode property.", + missingGc: "Missing '{{ gc }}=' in Unicode property.", + expectedKey: "Excepted {{ len }} key. Use '{{ key }}' instead.", + expectedProperty: + "Excepted {{ len }} {{ type }} property. Use '{{ prop }}' instead.", + }, + type: "suggestion", + fixable: "code", + }, + create(context) { + type Length = "short" | "long" | "ignore" + interface Options { + generalCategory?: "always" | "never" | "ignore" + key?: Length + property?: + | Length + | { binary?: Length; generalCategory?: Length; script?: Length } + } + const { + generalCategory = "never", + key: keyFormat = "ignore", + property = { + binary: "ignore", + generalCategory: "ignore", + script: "long", + }, + } = (context.options[0] || {}) as Options + + let defaultPropertyFormat: Length = "long" + if (typeof property === "string") { + defaultPropertyFormat = property + } + const { + binary: binaryFormat = defaultPropertyFormat, + generalCategory: generalCategoryFormat = defaultPropertyFormat, + script: scriptFormat = defaultPropertyFormat, + } = typeof property === "string" ? {} : property + + function createVisitor( + regexpContext: RegExpContext, + ): RegExpVisitor.Handlers { + const { node, getRegexpLocation, fixReplaceNode } = regexpContext + + function onUnicodeProperty(cs: UnicodePropertyCharacterSet): void { + // Whether the unicode property is in the form of \p{key} or \p{key=value}. + // We need to check this separately because regexpp automatically parses + // \p{L} as \p{General_Category=L} for all General_Category values. + const keyValueSyntax = cs.raw.includes("=") + + function fixReplace(inner: string) { + return fixReplaceNode(cs, `${cs.raw.slice(0, 2)}{${inner}}`) + } + + function getKeyLocation() { + const offset = "\\p{".length + if (keyValueSyntax) { + return getRegexpLocation({ + start: cs.start + offset, + end: cs.start + offset + cs.key.length, + }) + } + return getRegexpLocation({ + start: cs.start + offset, + end: cs.end - 1, + }) + } + + function getValueLocation() { + return getRegexpLocation({ + start: cs.end - 1 - (cs.value || cs.key).length, + end: cs.end - 1, + }) + } + + const { key, value } = cs + + if (value === null) { + // format: \p{key} + if (binaryFormat !== "ignore") { + const expected = + binaryFormat === "short" + ? UNICODE_BINARY_PROPERTY_ALIAS.toShort(key) + : UNICODE_BINARY_PROPERTY_ALIAS.toLong(key) + + if (key !== expected) { + context.report({ + node, + loc: getKeyLocation(), + messageId: "expectedProperty", + data: { + len: binaryFormat, + type: "binary", + prop: expected, + }, + fix: fixReplace(expected), + }) + } + } + } else { + // format: \p{key=value} + const isGC = isGeneralCategory(key) + let handledKey = false + if (isGC) { + if (keyValueSyntax && generalCategory === "never") { + context.report({ + node, + loc: getKeyLocation(), + messageId: "unnecessaryGc", + data: { gc: key }, + fix: fixReplace(value), + }) + handledKey = true + } + if (!keyValueSyntax && generalCategory === "always") { + const missing = + keyFormat === "long" ? "General_Category" : "gc" + context.report({ + node, + loc: getRegexpLocation(cs), + messageId: "missingGc", + data: { gc: missing }, + fix: fixReplace(`${missing}=${value}`), + }) + handledKey = true + } + } + + if ( + !handledKey && + keyValueSyntax && + keyFormat !== "ignore" + ) { + const expected = + keyFormat === "short" + ? UNICODE_CATEGORY_ALIAS.toShort(key) + : UNICODE_CATEGORY_ALIAS.toLong(key) + + if (key !== expected) { + context.report({ + node, + loc: getKeyLocation(), + messageId: "expectedKey", + data: { len: keyFormat, key: expected }, + fix: fixReplace(`${expected}=${value}`), + }) + } + } + + const valueFormat = isGC + ? generalCategoryFormat + : scriptFormat + if (valueFormat !== "ignore") { + const aliasMap = isGC + ? UNICODE_GENERAL_CATEGORY_ALIAS + : UNICODE_SCRIPT_ALIAS + const expected = + valueFormat === "short" + ? aliasMap.toShort(value) + : aliasMap.toLong(value) + + if (value !== expected) { + const prefix = keyValueSyntax ? `${key}=` : "" + const type = isGC ? "General_Category" : "Script" + + context.report({ + node, + loc: getValueLocation(), + messageId: "expectedProperty", + data: { + len: valueFormat, + type, + prop: expected, + }, + fix: fixReplace(`${prefix}${expected}`), + }) + } + } + } + } + + return { + onCharacterSetEnter(cs) { + if (cs.kind === "property") { + onUnicodeProperty(cs) + } + }, + } + } + + return defineRegexpVisitor(context, { + createVisitor, + }) + }, +}) diff --git a/lib/utils/unicode-alias.ts b/lib/utils/unicode-alias.ts new file mode 100644 index 000000000..13f34baf7 --- /dev/null +++ b/lib/utils/unicode-alias.ts @@ -0,0 +1,339 @@ +export class AliasMap { + private readonly toShortMap: ReadonlyMap + + private readonly toLongMap: ReadonlyMap + + public constructor({ + shortToLong, + otherToLong, + }: { + shortToLong: Record + otherToLong: Record + }) { + const toShortMap = new Map() + const toLongMap = new Map() + + for (const [short, long] of Object.entries(shortToLong)) { + toShortMap.set(long, short) + toLongMap.set(short, long) + } + for (const [other, long] of Object.entries(otherToLong)) { + toLongMap.set(other, long) + const short = toShortMap.get(long) + if (!short) { + throw new Error( + `No short key for ${long} with other key ${other}`, + ) + } + toShortMap.set(other, short) + } + + this.toShortMap = toShortMap + this.toLongMap = toLongMap + } + + public toShort(key: string): string { + return this.toShortMap.get(key) || key + } + + public toLong(key: string): string { + return this.toLongMap.get(key) || key + } +} + +export const UNICODE_CATEGORY_ALIAS = new AliasMap({ + shortToLong: { + gc: "General_Category", + sc: "Script", + scx: "Script_Extensions", + }, + otherToLong: {}, +}) + +// https://unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt +export const UNICODE_BINARY_PROPERTY_ALIAS = new AliasMap({ + shortToLong: { + AHex: "ASCII_Hex_Digit", + Alpha: "Alphabetic", + Bidi_C: "Bidi_Control", + Bidi_M: "Bidi_Mirrored", + CE: "Composition_Exclusion", + CI: "Case_Ignorable", + Comp_Ex: "Full_Composition_Exclusion", + CWCF: "Changes_When_Casefolded", + CWCM: "Changes_When_Casemapped", + CWKCF: "Changes_When_NFKC_Casefolded", + CWL: "Changes_When_Lowercased", + CWT: "Changes_When_Titlecased", + CWU: "Changes_When_Uppercased", + Dep: "Deprecated", + DI: "Default_Ignorable_Code_Point", + Dia: "Diacritic", + EBase: "Emoji_Modifier_Base", + EComp: "Emoji_Component", + EMod: "Emoji_Modifier", + EPres: "Emoji_Presentation", + Ext: "Extender", + ExtPict: "Extended_Pictographic", + Gr_Base: "Grapheme_Base", + Gr_Ext: "Grapheme_Extend", + Gr_Link: "Grapheme_Link", + Hex: "Hex_Digit", + IDC: "ID_Continue", + Ideo: "Ideographic", + IDS: "ID_Start", + IDSB: "IDS_Binary_Operator", + IDST: "IDS_Trinary_Operator", + IDSU: "IDS_Unary_Operator", + Join_C: "Join_Control", + LOE: "Logical_Order_Exception", + Lower: "Lowercase", + NChar: "Noncharacter_Code_Point", + OAlpha: "Other_Alphabetic", + ODI: "Other_Default_Ignorable_Code_Point", + OGr_Ext: "Other_Grapheme_Extend", + OIDC: "Other_ID_Continue", + OIDS: "Other_ID_Start", + OLower: "Other_Lowercase", + OMath: "Other_Math", + OUpper: "Other_Uppercase", + Pat_Syn: "Pattern_Syntax", + Pat_WS: "Pattern_White_Space", + PCM: "Prepended_Concatenation_Mark", + QMark: "Quotation_Mark", + RI: "Regional_Indicator", + SD: "Soft_Dotted", + STerm: "Sentence_Terminal", + Term: "Terminal_Punctuation", + UIdeo: "Unified_Ideograph", + Upper: "Uppercase", + VS: "Variation_Selector", + WSpace: "White_Space", + XIDC: "XID_Continue", + XIDS: "XID_Start", + XO_NFC: "Expands_On_NFC", + XO_NFD: "Expands_On_NFD", + XO_NFKC: "Expands_On_NFKC", + XO_NFKD: "Expands_On_NFKD", + }, + otherToLong: { + space: "White_Space", + }, +}) + +// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt +export const UNICODE_GENERAL_CATEGORY_ALIAS = new AliasMap({ + shortToLong: { + C: "Other", + Cc: "Control", + Cf: "Format", + Cn: "Unassigned", + Co: "Private_Use", + Cs: "Surrogate", + L: "Letter", + LC: "Cased_Letter", + Ll: "Lowercase_Letter", + Lm: "Modifier_Letter", + Lo: "Other_Letter", + Lt: "Titlecase_Letter", + Lu: "Uppercase_Letter", + M: "Mark", + Mc: "Spacing_Mark", + Me: "Enclosing_Mark", + Mn: "Nonspacing_Mark", + N: "Number", + Nd: "Decimal_Number", + Nl: "Letter_Number", + No: "Other_Number", + P: "Punctuation", + Pc: "Connector_Punctuation", + Pd: "Dash_Punctuation", + Pe: "Close_Punctuation", + Pf: "Final_Punctuation", + Pi: "Initial_Punctuation", + Po: "Other_Punctuation", + Ps: "Open_Punctuation", + S: "Symbol", + Sc: "Currency_Symbol", + Sk: "Modifier_Symbol", + Sm: "Math_Symbol", + So: "Other_Symbol", + Z: "Separator", + Zl: "Line_Separator", + Zp: "Paragraph_Separator", + Zs: "Space_Separator", + }, + otherToLong: { + cntrl: "Control", + Combining_Mark: "Mark", + digit: "Decimal_Number", + punct: "Punctuation", + }, +}) + +// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt +export const UNICODE_SCRIPT_ALIAS = new AliasMap({ + shortToLong: { + Adlm: "Adlam", + Aghb: "Caucasian_Albanian", + Arab: "Arabic", + Armi: "Imperial_Aramaic", + Armn: "Armenian", + Avst: "Avestan", + Bali: "Balinese", + Bamu: "Bamum", + Bass: "Bassa_Vah", + Batk: "Batak", + Beng: "Bengali", + Bhks: "Bhaiksuki", + Bopo: "Bopomofo", + Brah: "Brahmi", + Brai: "Braille", + Bugi: "Buginese", + Buhd: "Buhid", + Cakm: "Chakma", + Cans: "Canadian_Aboriginal", + Cari: "Carian", + Cher: "Cherokee", + Chrs: "Chorasmian", + Copt: "Coptic", + Cpmn: "Cypro_Minoan", + Cprt: "Cypriot", + Cyrl: "Cyrillic", + Deva: "Devanagari", + Diak: "Dives_Akuru", + Dogr: "Dogra", + Dsrt: "Deseret", + Dupl: "Duployan", + Egyp: "Egyptian_Hieroglyphs", + Elba: "Elbasan", + Elym: "Elymaic", + Ethi: "Ethiopic", + Geor: "Georgian", + Glag: "Glagolitic", + Gong: "Gunjala_Gondi", + Gonm: "Masaram_Gondi", + Goth: "Gothic", + Gran: "Grantha", + Grek: "Greek", + Gujr: "Gujarati", + Guru: "Gurmukhi", + Hang: "Hangul", + Hani: "Han", + Hano: "Hanunoo", + Hatr: "Hatran", + Hebr: "Hebrew", + Hira: "Hiragana", + Hluw: "Anatolian_Hieroglyphs", + Hmng: "Pahawh_Hmong", + Hmnp: "Nyiakeng_Puachue_Hmong", + Hrkt: "Katakana_Or_Hiragana", + Hung: "Old_Hungarian", + Ital: "Old_Italic", + Java: "Javanese", + Kali: "Kayah_Li", + Kana: "Katakana", + Khar: "Kharoshthi", + Khmr: "Khmer", + Khoj: "Khojki", + Kits: "Khitan_Small_Script", + Knda: "Kannada", + Kthi: "Kaithi", + Lana: "Tai_Tham", + Laoo: "Lao", + Latn: "Latin", + Lepc: "Lepcha", + Limb: "Limbu", + Lina: "Linear_A", + Linb: "Linear_B", + Lyci: "Lycian", + Lydi: "Lydian", + Mahj: "Mahajani", + Maka: "Makasar", + Mand: "Mandaic", + Mani: "Manichaean", + Marc: "Marchen", + Medf: "Medefaidrin", + Mend: "Mende_Kikakui", + Merc: "Meroitic_Cursive", + Mero: "Meroitic_Hieroglyphs", + Mlym: "Malayalam", + Mong: "Mongolian", + Mroo: "Mro", + Mtei: "Meetei_Mayek", + Mult: "Multani", + Mymr: "Myanmar", + Nagm: "Nag_Mundari", + Nand: "Nandinagari", + Narb: "Old_North_Arabian", + Nbat: "Nabataean", + Nkoo: "Nko", + Nshu: "Nushu", + Ogam: "Ogham", + Olck: "Ol_Chiki", + Orkh: "Old_Turkic", + Orya: "Oriya", + Osge: "Osage", + Osma: "Osmanya", + Ougr: "Old_Uyghur", + Palm: "Palmyrene", + Pauc: "Pau_Cin_Hau", + Perm: "Old_Permic", + Phag: "Phags_Pa", + Phli: "Inscriptional_Pahlavi", + Phlp: "Psalter_Pahlavi", + Phnx: "Phoenician", + Plrd: "Miao", + Prti: "Inscriptional_Parthian", + Rjng: "Rejang", + Rohg: "Hanifi_Rohingya", + Runr: "Runic", + Samr: "Samaritan", + Sarb: "Old_South_Arabian", + Saur: "Saurashtra", + Sgnw: "SignWriting", + Shaw: "Shavian", + Shrd: "Sharada", + Sidd: "Siddham", + Sind: "Khudawadi", + Sinh: "Sinhala", + Sogd: "Sogdian", + Sogo: "Old_Sogdian", + Sora: "Sora_Sompeng", + Soyo: "Soyombo", + Sund: "Sundanese", + Sylo: "Syloti_Nagri", + Syrc: "Syriac", + Tagb: "Tagbanwa", + Takr: "Takri", + Tale: "Tai_Le", + Talu: "New_Tai_Lue", + Taml: "Tamil", + Tang: "Tangut", + Tavt: "Tai_Viet", + Telu: "Telugu", + Tfng: "Tifinagh", + Tglg: "Tagalog", + Thaa: "Thaana", + Tibt: "Tibetan", + Tirh: "Tirhuta", + Tnsa: "Tangsa", + Ugar: "Ugaritic", + Vaii: "Vai", + Vith: "Vithkuqi", + Wara: "Warang_Citi", + Wcho: "Wancho", + Xpeo: "Old_Persian", + Xsux: "Cuneiform", + Yezi: "Yezidi", + Yiii: "Yi", + Zanb: "Zanabazar_Square", + Zinh: "Inherited", + Zyyy: "Common", + Zzzz: "Unknown", + }, + otherToLong: { + Qaac: "Coptic", + Qaai: "Inherited", + }, +}) diff --git a/tests/lib/rules/__snapshots__/unicode-property.ts.eslintsnap b/tests/lib/rules/__snapshots__/unicode-property.ts.eslintsnap new file mode 100644 index 000000000..fdd23ddf4 --- /dev/null +++ b/tests/lib/rules/__snapshots__/unicode-property.ts.eslintsnap @@ -0,0 +1,649 @@ +# eslint-snapshot-rule-tester format: v1 + + +Test: unicode-property >> invalid >>> test default configuration +Code: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + 3 | /\P{Hex}/u; + 4 | + 5 | /\p{L}/u; + 6 | /\p{Letter}/u; + 7 | /\p{gc=L}/u; + | ^~ [1] + 8 | /\p{gc=Letter}/u; + | ^~ [2] + 9 | /\p{General_Category=L}/u; + | ^~~~~~~~~~~~~~~~ [3] + 10 | /\p{General_Category=Letter}/u; + | ^~~~~~~~~~~~~~~~ [4] + 11 | + 12 | /\p{sc=Grek}/u; + | ^~~~ [5] + 13 | /\p{sc=Greek}/u; + 14 | /\p{Script=Grek}/u; + | ^~~~ [6] + 15 | /\p{Script=Greek}/u; + 16 | + 17 | /\p{scx=Grek}/u; + | ^~~~ [7] + 18 | /\p{scx=Greek}/u; + 19 | /\p{Script_Extensions=Grek}/u; + | ^~~~ [8] + 20 | /\p{Script_Extensions=Greek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Cc} \p{cntrl}/u; + 28 | /\p{Mark} \p{M} \p{Combining_Mark}/u; + 29 | /\p{Decimal_Number} \p{Nd} \p{digit}/u; + 30 | /\p{Punctuation} \p{P} \p{punct}/u; + +Output: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + 3 | /\P{Hex}/u; + 4 | + 5 | /\p{L}/u; + 6 | /\p{Letter}/u; + 7 | /\p{L}/u; + 8 | /\p{Letter}/u; + 9 | /\p{L}/u; + 10 | /\p{Letter}/u; + 11 | + 12 | /\p{sc=Greek}/u; + 13 | /\p{sc=Greek}/u; + 14 | /\p{Script=Greek}/u; + 15 | /\p{Script=Greek}/u; + 16 | + 17 | /\p{scx=Greek}/u; + 18 | /\p{scx=Greek}/u; + 19 | /\p{Script_Extensions=Greek}/u; + 20 | /\p{Script_Extensions=Greek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Cc} \p{cntrl}/u; + 28 | /\p{Mark} \p{M} \p{Combining_Mark}/u; + 29 | /\p{Decimal_Number} \p{Nd} \p{digit}/u; + 30 | /\p{Punctuation} \p{P} \p{punct}/u; + +[1] Unnecessary 'gc=' in Unicode property. +[2] Unnecessary 'gc=' in Unicode property. +[3] Unnecessary 'General_Category=' in Unicode property. +[4] Unnecessary 'General_Category=' in Unicode property. +[5] Excepted long Script property. Use 'Greek' instead. +[6] Excepted long Script property. Use 'Greek' instead. +[7] Excepted long Script property. Use 'Greek' instead. +[8] Excepted long Script property. Use 'Greek' instead. +--- + + +Test: unicode-property >> invalid +Options: + - generalCategory: always + key: ignore + property: ignore + +Code: + 1 | /\p{L}/u; + | ^~~~~ [1] + 2 | /\p{Letter}/u; + | ^~~~~~~~~~ [2] + 3 | /\p{gc=L}/u; + 4 | /\p{gc=Letter}/u; + 5 | /\p{General_Category=L}/u; + 6 | /\p{General_Category=Letter}/u; + +Output: + 1 | /\p{gc=L}/u; + 2 | /\p{gc=Letter}/u; + 3 | /\p{gc=L}/u; + 4 | /\p{gc=Letter}/u; + 5 | /\p{General_Category=L}/u; + 6 | /\p{General_Category=Letter}/u; + +[1] Missing 'gc=' in Unicode property. +[2] Missing 'gc=' in Unicode property. +--- + + +Test: unicode-property >> invalid >>> "/\\p{L}/u;\n/\\p{Letter}/u;\n/\\p{gc=L}/u;\n/\\p{gc=Letter}/u;\n/\\p{General_Category=L}/u;\n/\\p{General_Category=Letter}/u; 2" +Options: + - generalCategory: never + key: ignore + property: ignore + +Code: + 1 | /\p{L}/u; + 2 | /\p{Letter}/u; + 3 | /\p{gc=L}/u; + | ^~ [1] + 4 | /\p{gc=Letter}/u; + | ^~ [2] + 5 | /\p{General_Category=L}/u; + | ^~~~~~~~~~~~~~~~ [3] + 6 | /\p{General_Category=Letter}/u; + | ^~~~~~~~~~~~~~~~ [4] + +Output: + 1 | /\p{L}/u; + 2 | /\p{Letter}/u; + 3 | /\p{L}/u; + 4 | /\p{Letter}/u; + 5 | /\p{L}/u; + 6 | /\p{Letter}/u; + +[1] Unnecessary 'gc=' in Unicode property. +[2] Unnecessary 'gc=' in Unicode property. +[3] Unnecessary 'General_Category=' in Unicode property. +[4] Unnecessary 'General_Category=' in Unicode property. +--- + + +Test: unicode-property >> invalid +Options: + - generalCategory: ignore + key: long + property: ignore + +Code: + 1 | /\p{L}/u; + 2 | /\p{Letter}/u; + 3 | /\p{gc=L}/u; + | ^~ [1] + 4 | /\p{gc=Letter}/u; + | ^~ [2] + 5 | /\p{General_Category=L}/u; + 6 | /\p{General_Category=Letter}/u; + 7 | + 8 | /\p{sc=Grek}/u; + | ^~ [3] + 9 | /\p{sc=Greek}/u; + | ^~ [4] + 10 | /\p{Script=Grek}/u; + 11 | /\p{Script=Greek}/u; + 12 | + 13 | /\p{scx=Grek}/u; + | ^~~ [5] + 14 | /\p{scx=Greek}/u; + | ^~~ [6] + 15 | /\p{Script_Extensions=Grek}/u; + 16 | /\p{Script_Extensions=Greek}/u; + +Output: + 1 | /\p{L}/u; + 2 | /\p{Letter}/u; + 3 | /\p{General_Category=L}/u; + 4 | /\p{General_Category=Letter}/u; + 5 | /\p{General_Category=L}/u; + 6 | /\p{General_Category=Letter}/u; + 7 | + 8 | /\p{Script=Grek}/u; + 9 | /\p{Script=Greek}/u; + 10 | /\p{Script=Grek}/u; + 11 | /\p{Script=Greek}/u; + 12 | + 13 | /\p{Script_Extensions=Grek}/u; + 14 | /\p{Script_Extensions=Greek}/u; + 15 | /\p{Script_Extensions=Grek}/u; + 16 | /\p{Script_Extensions=Greek}/u; + +[1] Excepted long key. Use 'General_Category' instead. +[2] Excepted long key. Use 'General_Category' instead. +[3] Excepted long key. Use 'Script' instead. +[4] Excepted long key. Use 'Script' instead. +[5] Excepted long key. Use 'Script_Extensions' instead. +[6] Excepted long key. Use 'Script_Extensions' instead. +--- + + +Test: unicode-property >> invalid >>> "/\\p{L}/u;\n/\\p{Letter}/u;\n/\\p{gc=L}/u;\n/\\p{gc=Letter}/u;\n/\\p{General_Category=L}/u;\n/\\p{General_Category=Letter}/u;\n\n/\\p{sc=Grek}/u;\n/\\p{sc=Greek}/u;\n/\\p{Script=Grek}/u;\n/\\p{Script=Greek}/u;\n\n/\\p{scx=Grek}/u;\n/\\p{scx=Greek}/u;\n/\\p{Script_Extensions=Grek}/u;\n/\\p{Script_Extensions=Greek}/u; 2" +Options: + - generalCategory: ignore + key: short + property: ignore + +Code: + 1 | /\p{L}/u; + 2 | /\p{Letter}/u; + 3 | /\p{gc=L}/u; + 4 | /\p{gc=Letter}/u; + 5 | /\p{General_Category=L}/u; + | ^~~~~~~~~~~~~~~~ [1] + 6 | /\p{General_Category=Letter}/u; + | ^~~~~~~~~~~~~~~~ [2] + 7 | + 8 | /\p{sc=Grek}/u; + 9 | /\p{sc=Greek}/u; + 10 | /\p{Script=Grek}/u; + | ^~~~~~ [3] + 11 | /\p{Script=Greek}/u; + | ^~~~~~ [4] + 12 | + 13 | /\p{scx=Grek}/u; + 14 | /\p{scx=Greek}/u; + 15 | /\p{Script_Extensions=Grek}/u; + | ^~~~~~~~~~~~~~~~~ [5] + 16 | /\p{Script_Extensions=Greek}/u; + | ^~~~~~~~~~~~~~~~~ [6] + +Output: + 1 | /\p{L}/u; + 2 | /\p{Letter}/u; + 3 | /\p{gc=L}/u; + 4 | /\p{gc=Letter}/u; + 5 | /\p{gc=L}/u; + 6 | /\p{gc=Letter}/u; + 7 | + 8 | /\p{sc=Grek}/u; + 9 | /\p{sc=Greek}/u; + 10 | /\p{sc=Grek}/u; + 11 | /\p{sc=Greek}/u; + 12 | + 13 | /\p{scx=Grek}/u; + 14 | /\p{scx=Greek}/u; + 15 | /\p{scx=Grek}/u; + 16 | /\p{scx=Greek}/u; + +[1] Excepted short key. Use 'gc' instead. +[2] Excepted short key. Use 'gc' instead. +[3] Excepted short key. Use 'sc' instead. +[4] Excepted short key. Use 'sc' instead. +[5] Excepted short key. Use 'scx' instead. +[6] Excepted short key. Use 'scx' instead. +--- + + +Test: unicode-property >> invalid +Options: + - generalCategory: ignore + key: ignore + property: long + +Code: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + 3 | /\P{Hex}/u; + | ^~~ [1] + 4 | + 5 | /\p{L}/u; + | ^ [2] + 6 | /\p{Letter}/u; + 7 | /\p{gc=L}/u; + | ^ [3] + 8 | /\p{gc=Letter}/u; + 9 | /\p{General_Category=L}/u; + | ^ [4] + 10 | /\p{General_Category=Letter}/u; + 11 | + 12 | /\p{sc=Grek}/u; + | ^~~~ [5] + 13 | /\p{sc=Greek}/u; + 14 | /\p{Script=Grek}/u; + | ^~~~ [6] + 15 | /\p{Script=Greek}/u; + 16 | + 17 | /\p{scx=Grek}/u; + | ^~~~ [7] + 18 | /\p{scx=Greek}/u; + 19 | /\p{Script_Extensions=Grek}/u; + | ^~~~ [8] + 20 | /\p{Script_Extensions=Greek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Cc} \p{cntrl}/u; + | ^~ [9] ^~~~~ [10] + 28 | /\p{Mark} \p{M} \p{Combining_Mark}/u; + | ^ ^~~~~~~~~~~~~~ + | [11] [12] + 29 | /\p{Decimal_Number} \p{Nd} \p{digit}/u; + | ^~ ^~~~~ + | [13] [14] + 30 | /\p{Punctuation} \p{P} \p{punct}/u; + | ^ ^~~~~ + | [15] [16] + +Output: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + 3 | /\P{Hex_Digit}/u; + 4 | + 5 | /\p{Letter}/u; + 6 | /\p{Letter}/u; + 7 | /\p{gc=Letter}/u; + 8 | /\p{gc=Letter}/u; + 9 | /\p{General_Category=Letter}/u; + 10 | /\p{General_Category=Letter}/u; + 11 | + 12 | /\p{sc=Greek}/u; + 13 | /\p{sc=Greek}/u; + 14 | /\p{Script=Greek}/u; + 15 | /\p{Script=Greek}/u; + 16 | + 17 | /\p{scx=Greek}/u; + 18 | /\p{scx=Greek}/u; + 19 | /\p{Script_Extensions=Greek}/u; + 20 | /\p{Script_Extensions=Greek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Control} \p{Control}/u; + 28 | /\p{Mark} \p{Mark} \p{Mark}/u; + 29 | /\p{Decimal_Number} \p{Decimal_Number} \p{Decimal_Number}/u; + 30 | /\p{Punctuation} \p{Punctuation} \p{Punctuation}/u; + +[1] Excepted long binary property. Use 'Hex_Digit' instead. +[2] Excepted long General_Category property. Use 'Letter' instead. +[3] Excepted long General_Category property. Use 'Letter' instead. +[4] Excepted long General_Category property. Use 'Letter' instead. +[5] Excepted long Script property. Use 'Greek' instead. +[6] Excepted long Script property. Use 'Greek' instead. +[7] Excepted long Script property. Use 'Greek' instead. +[8] Excepted long Script property. Use 'Greek' instead. +[9] Excepted long General_Category property. Use 'Control' instead. +[10] Excepted long General_Category property. Use 'Control' instead. +[11] Excepted long General_Category property. Use 'Mark' instead. +[12] Excepted long General_Category property. Use 'Mark' instead. +[13] Excepted long General_Category property. Use 'Decimal_Number' instead. +[14] Excepted long General_Category property. Use 'Decimal_Number' instead. +[15] Excepted long General_Category property. Use 'Punctuation' instead. +[16] Excepted long General_Category property. Use 'Punctuation' instead. +--- + + +Test: unicode-property >> invalid >>> "/\\P{ASCII}/u;\n/\\P{Hex_Digit}/u;\n/\\P{Hex}/u;\n\n/\\p{L}/u;\n/\\p{Letter}/u;\n/\\p{gc=L}/u;\n/\\p{gc=Letter}/u;\n/\\p{General_Category=L}/u;\n/\\p{General_Category=Letter}/u;\n\n/\\p{sc=Grek}/u;\n/\\p{sc=Greek}/u;\n/\\p{Script=Grek}/u;\n/\\p{Script=Greek}/u;\n\n/\\p{scx=Grek}/u;\n/\\p{scx=Greek}/u;\n/\\p{Script_Extensions=Grek}/u;\n/\\p{Script_Extensions=Greek}/u;\n\n// Binary Properties\n// https://github.com/tc39/ecma262/issues/3286\n// /\\p{White_Space} \\p{space} \\p{WSpace}/u;\n\n// General_Category\n/\\p{Control} \\p{Cc} \\p{cntrl}/u;\n/\\p{Mark} \\p{M} \\p{Combining_Mark}/u;\n/\\p{Decimal_Number} \\p{Nd} \\p{digit}/u;\n/\\p{Punctuation} \\p{P} \\p{punct}/u; 2" +Options: + - generalCategory: ignore + key: ignore + property: short + +Code: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + | ^~~~~~~~~ [1] + 3 | /\P{Hex}/u; + 4 | + 5 | /\p{L}/u; + 6 | /\p{Letter}/u; + | ^~~~~~ [2] + 7 | /\p{gc=L}/u; + 8 | /\p{gc=Letter}/u; + | ^~~~~~ [3] + 9 | /\p{General_Category=L}/u; + 10 | /\p{General_Category=Letter}/u; + | ^~~~~~ [4] + 11 | + 12 | /\p{sc=Grek}/u; + 13 | /\p{sc=Greek}/u; + | ^~~~~ [5] + 14 | /\p{Script=Grek}/u; + 15 | /\p{Script=Greek}/u; + | ^~~~~ [6] + 16 | + 17 | /\p{scx=Grek}/u; + 18 | /\p{scx=Greek}/u; + | ^~~~~ [7] + 19 | /\p{Script_Extensions=Grek}/u; + 20 | /\p{Script_Extensions=Greek}/u; + | ^~~~~ [8] + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Cc} \p{cntrl}/u; + | ^~~~~~~ [9] ^~~~~ [10] + 28 | /\p{Mark} \p{M} \p{Combining_Mark}/u; + | ^~~~ [11] ^~~~~~~~~~~~~~ [12] + 29 | /\p{Decimal_Number} \p{Nd} \p{digit}/u; + | ^~~~~~~~~~~~~~ [13] ^~~~~ [14] + 30 | /\p{Punctuation} \p{P} \p{punct}/u; + | ^~~~~~~~~~~ [15] ^~~~~ [16] + +Output: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex}/u; + 3 | /\P{Hex}/u; + 4 | + 5 | /\p{L}/u; + 6 | /\p{L}/u; + 7 | /\p{gc=L}/u; + 8 | /\p{gc=L}/u; + 9 | /\p{General_Category=L}/u; + 10 | /\p{General_Category=L}/u; + 11 | + 12 | /\p{sc=Grek}/u; + 13 | /\p{sc=Grek}/u; + 14 | /\p{Script=Grek}/u; + 15 | /\p{Script=Grek}/u; + 16 | + 17 | /\p{scx=Grek}/u; + 18 | /\p{scx=Grek}/u; + 19 | /\p{Script_Extensions=Grek}/u; + 20 | /\p{Script_Extensions=Grek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Cc} \p{Cc} \p{Cc}/u; + 28 | /\p{M} \p{M} \p{M}/u; + 29 | /\p{Nd} \p{Nd} \p{Nd}/u; + 30 | /\p{P} \p{P} \p{P}/u; + +[1] Excepted short binary property. Use 'Hex' instead. +[2] Excepted short General_Category property. Use 'L' instead. +[3] Excepted short General_Category property. Use 'L' instead. +[4] Excepted short General_Category property. Use 'L' instead. +[5] Excepted short Script property. Use 'Grek' instead. +[6] Excepted short Script property. Use 'Grek' instead. +[7] Excepted short Script property. Use 'Grek' instead. +[8] Excepted short Script property. Use 'Grek' instead. +[9] Excepted short General_Category property. Use 'Cc' instead. +[10] Excepted short General_Category property. Use 'Cc' instead. +[11] Excepted short General_Category property. Use 'M' instead. +[12] Excepted short General_Category property. Use 'M' instead. +[13] Excepted short General_Category property. Use 'Nd' instead. +[14] Excepted short General_Category property. Use 'Nd' instead. +[15] Excepted short General_Category property. Use 'P' instead. +[16] Excepted short General_Category property. Use 'P' instead. +--- + + +Test: unicode-property >> invalid >>> "/\\P{ASCII}/u;\n/\\P{Hex_Digit}/u;\n/\\P{Hex}/u;\n\n/\\p{L}/u;\n/\\p{Letter}/u;\n/\\p{gc=L}/u;\n/\\p{gc=Letter}/u;\n/\\p{General_Category=L}/u;\n/\\p{General_Category=Letter}/u;\n\n/\\p{sc=Grek}/u;\n/\\p{sc=Greek}/u;\n/\\p{Script=Grek}/u;\n/\\p{Script=Greek}/u;\n\n/\\p{scx=Grek}/u;\n/\\p{scx=Greek}/u;\n/\\p{Script_Extensions=Grek}/u;\n/\\p{Script_Extensions=Greek}/u;\n\n// Binary Properties\n// https://github.com/tc39/ecma262/issues/3286\n// /\\p{White_Space} \\p{space} \\p{WSpace}/u;\n\n// General_Category\n/\\p{Control} \\p{Cc} \\p{cntrl}/u;\n/\\p{Mark} \\p{M} \\p{Combining_Mark}/u;\n/\\p{Decimal_Number} \\p{Nd} \\p{digit}/u;\n/\\p{Punctuation} \\p{P} \\p{punct}/u; 3" +Options: + - generalCategory: ignore + key: ignore + property: + binary: short + generalCategory: long + script: ignore + +Code: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + | ^~~~~~~~~ [1] + 3 | /\P{Hex}/u; + 4 | + 5 | /\p{L}/u; + | ^ [2] + 6 | /\p{Letter}/u; + 7 | /\p{gc=L}/u; + | ^ [3] + 8 | /\p{gc=Letter}/u; + 9 | /\p{General_Category=L}/u; + | ^ [4] + 10 | /\p{General_Category=Letter}/u; + 11 | + 12 | /\p{sc=Grek}/u; + 13 | /\p{sc=Greek}/u; + 14 | /\p{Script=Grek}/u; + 15 | /\p{Script=Greek}/u; + 16 | + 17 | /\p{scx=Grek}/u; + 18 | /\p{scx=Greek}/u; + 19 | /\p{Script_Extensions=Grek}/u; + 20 | /\p{Script_Extensions=Greek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Cc} \p{cntrl}/u; + | ^~ [5] ^~~~~ [6] + 28 | /\p{Mark} \p{M} \p{Combining_Mark}/u; + | ^ [7] ^~~~~~~~~~~~~~ [8] + 29 | /\p{Decimal_Number} \p{Nd} \p{digit}/u; + | ^~ [9] ^~~~~ [10] + 30 | /\p{Punctuation} \p{P} \p{punct}/u; + | ^ ^~~~~ + | [11] [12] + +Output: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex}/u; + 3 | /\P{Hex}/u; + 4 | + 5 | /\p{Letter}/u; + 6 | /\p{Letter}/u; + 7 | /\p{gc=Letter}/u; + 8 | /\p{gc=Letter}/u; + 9 | /\p{General_Category=Letter}/u; + 10 | /\p{General_Category=Letter}/u; + 11 | + 12 | /\p{sc=Grek}/u; + 13 | /\p{sc=Greek}/u; + 14 | /\p{Script=Grek}/u; + 15 | /\p{Script=Greek}/u; + 16 | + 17 | /\p{scx=Grek}/u; + 18 | /\p{scx=Greek}/u; + 19 | /\p{Script_Extensions=Grek}/u; + 20 | /\p{Script_Extensions=Greek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Control} \p{Control}/u; + 28 | /\p{Mark} \p{Mark} \p{Mark}/u; + 29 | /\p{Decimal_Number} \p{Decimal_Number} \p{Decimal_Number}/u; + 30 | /\p{Punctuation} \p{Punctuation} \p{Punctuation}/u; + +[1] Excepted short binary property. Use 'Hex' instead. +[2] Excepted long General_Category property. Use 'Letter' instead. +[3] Excepted long General_Category property. Use 'Letter' instead. +[4] Excepted long General_Category property. Use 'Letter' instead. +[5] Excepted long General_Category property. Use 'Control' instead. +[6] Excepted long General_Category property. Use 'Control' instead. +[7] Excepted long General_Category property. Use 'Mark' instead. +[8] Excepted long General_Category property. Use 'Mark' instead. +[9] Excepted long General_Category property. Use 'Decimal_Number' instead. +[10] Excepted long General_Category property. Use 'Decimal_Number' instead. +[11] Excepted long General_Category property. Use 'Punctuation' instead. +[12] Excepted long General_Category property. Use 'Punctuation' instead. +--- + + +Test: unicode-property >> invalid >>> "/\\P{ASCII}/u;\n/\\P{Hex_Digit}/u;\n/\\P{Hex}/u;\n\n/\\p{L}/u;\n/\\p{Letter}/u;\n/\\p{gc=L}/u;\n/\\p{gc=Letter}/u;\n/\\p{General_Category=L}/u;\n/\\p{General_Category=Letter}/u;\n\n/\\p{sc=Grek}/u;\n/\\p{sc=Greek}/u;\n/\\p{Script=Grek}/u;\n/\\p{Script=Greek}/u;\n\n/\\p{scx=Grek}/u;\n/\\p{scx=Greek}/u;\n/\\p{Script_Extensions=Grek}/u;\n/\\p{Script_Extensions=Greek}/u;\n\n// Binary Properties\n// https://github.com/tc39/ecma262/issues/3286\n// /\\p{White_Space} \\p{space} \\p{WSpace}/u;\n\n// General_Category\n/\\p{Control} \\p{Cc} \\p{cntrl}/u;\n/\\p{Mark} \\p{M} \\p{Combining_Mark}/u;\n/\\p{Decimal_Number} \\p{Nd} \\p{digit}/u;\n/\\p{Punctuation} \\p{P} \\p{punct}/u; 4" +Options: + - generalCategory: ignore + key: ignore + property: + binary: long + generalCategory: ignore + script: short + +Code: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + 3 | /\P{Hex}/u; + | ^~~ [1] + 4 | + 5 | /\p{L}/u; + 6 | /\p{Letter}/u; + 7 | /\p{gc=L}/u; + 8 | /\p{gc=Letter}/u; + 9 | /\p{General_Category=L}/u; + 10 | /\p{General_Category=Letter}/u; + 11 | + 12 | /\p{sc=Grek}/u; + 13 | /\p{sc=Greek}/u; + | ^~~~~ [2] + 14 | /\p{Script=Grek}/u; + 15 | /\p{Script=Greek}/u; + | ^~~~~ [3] + 16 | + 17 | /\p{scx=Grek}/u; + 18 | /\p{scx=Greek}/u; + | ^~~~~ [4] + 19 | /\p{Script_Extensions=Grek}/u; + 20 | /\p{Script_Extensions=Greek}/u; + | ^~~~~ [5] + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Cc} \p{cntrl}/u; + 28 | /\p{Mark} \p{M} \p{Combining_Mark}/u; + 29 | /\p{Decimal_Number} \p{Nd} \p{digit}/u; + 30 | /\p{Punctuation} \p{P} \p{punct}/u; + +Output: + 1 | /\P{ASCII}/u; + 2 | /\P{Hex_Digit}/u; + 3 | /\P{Hex_Digit}/u; + 4 | + 5 | /\p{L}/u; + 6 | /\p{Letter}/u; + 7 | /\p{gc=L}/u; + 8 | /\p{gc=Letter}/u; + 9 | /\p{General_Category=L}/u; + 10 | /\p{General_Category=Letter}/u; + 11 | + 12 | /\p{sc=Grek}/u; + 13 | /\p{sc=Grek}/u; + 14 | /\p{Script=Grek}/u; + 15 | /\p{Script=Grek}/u; + 16 | + 17 | /\p{scx=Grek}/u; + 18 | /\p{scx=Grek}/u; + 19 | /\p{Script_Extensions=Grek}/u; + 20 | /\p{Script_Extensions=Grek}/u; + 21 | + 22 | // Binary Properties + 23 | // https://github.com/tc39/ecma262/issues/3286 + 24 | // /\p{White_Space} \p{space} \p{WSpace}/u; + 25 | + 26 | // General_Category + 27 | /\p{Control} \p{Cc} \p{cntrl}/u; + 28 | /\p{Mark} \p{M} \p{Combining_Mark}/u; + 29 | /\p{Decimal_Number} \p{Nd} \p{digit}/u; + 30 | /\p{Punctuation} \p{P} \p{punct}/u; + +[1] Excepted long binary property. Use 'Hex_Digit' instead. +[2] Excepted short Script property. Use 'Grek' instead. +[3] Excepted short Script property. Use 'Grek' instead. +[4] Excepted short Script property. Use 'Grek' instead. +[5] Excepted short Script property. Use 'Grek' instead. +--- diff --git a/tests/lib/rules/unicode-property.ts b/tests/lib/rules/unicode-property.ts new file mode 100644 index 000000000..9223a6068 --- /dev/null +++ b/tests/lib/rules/unicode-property.ts @@ -0,0 +1,144 @@ +import { SnapshotRuleTester } from "eslint-snapshot-rule-tester" +import rule from "../../../lib/rules/unicode-property" + +const tester = new SnapshotRuleTester({ + languageOptions: { + ecmaVersion: "latest", + sourceType: "module", + }, +}) + +const gc = String.raw` +/\p{L}/u; +/\p{Letter}/u; +/\p{gc=L}/u; +/\p{gc=Letter}/u; +/\p{General_Category=L}/u; +/\p{General_Category=Letter}/u; +`.trim() + +const keyValue = String.raw` +${gc} + +/\p{sc=Grek}/u; +/\p{sc=Greek}/u; +/\p{Script=Grek}/u; +/\p{Script=Greek}/u; + +/\p{scx=Grek}/u; +/\p{scx=Greek}/u; +/\p{Script_Extensions=Grek}/u; +/\p{Script_Extensions=Greek}/u; +`.trim() + +// annoyingly, some have 2 aliases +const interesting = String.raw` +// Binary Properties +// https://github.com/tc39/ecma262/issues/3286 +// /\p{White_Space} \p{space} \p{WSpace}/u; + +// General_Category +/\p{Control} \p{Cc} \p{cntrl}/u; +/\p{Mark} \p{M} \p{Combining_Mark}/u; +/\p{Decimal_Number} \p{Nd} \p{digit}/u; +/\p{Punctuation} \p{P} \p{punct}/u; +`.trim() + +const allForms = String.raw` +/\P{ASCII}/u; +/\P{Hex_Digit}/u; +/\P{Hex}/u; + +${keyValue} + +${interesting} +`.trim() + +tester.run("unicode-property", rule as any, { + valid: [ + { + code: allForms, + options: [ + { + generalCategory: "ignore", + key: "ignore", + property: "ignore", + }, + ], + }, + ], + invalid: [ + { + name: "test default configuration", + code: allForms, + }, + { + code: gc, + options: [ + { + generalCategory: "always", + key: "ignore", + property: "ignore", + }, + ], + }, + { + code: gc, + options: [ + { generalCategory: "never", key: "ignore", property: "ignore" }, + ], + }, + { + code: keyValue, + options: [ + { generalCategory: "ignore", key: "long", property: "ignore" }, + ], + }, + { + code: keyValue, + options: [ + { generalCategory: "ignore", key: "short", property: "ignore" }, + ], + }, + { + code: allForms, + options: [ + { generalCategory: "ignore", key: "ignore", property: "long" }, + ], + }, + { + code: allForms, + options: [ + { generalCategory: "ignore", key: "ignore", property: "short" }, + ], + }, + { + code: allForms, + options: [ + { + generalCategory: "ignore", + key: "ignore", + property: { + binary: "short", + generalCategory: "long", + script: "ignore", + }, + }, + ], + }, + { + code: allForms, + options: [ + { + generalCategory: "ignore", + key: "ignore", + property: { + binary: "long", + generalCategory: "ignore", + script: "short", + }, + }, + ], + }, + ], +}) From 0796a090715e967c466def0a3b1300ae05483927 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Sat, 6 Apr 2024 19:39:05 +0200 Subject: [PATCH 2/3] Create nervous-lies-yawn.md --- .changeset/nervous-lies-yawn.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/nervous-lies-yawn.md diff --git a/.changeset/nervous-lies-yawn.md b/.changeset/nervous-lies-yawn.md new file mode 100644 index 000000000..39395260b --- /dev/null +++ b/.changeset/nervous-lies-yawn.md @@ -0,0 +1,5 @@ +--- +"eslint-plugin-regexp": minor +--- + +Add `regexp/unicode-property` rule to enforce consistent naming of unicode properties From 2af62fcfa0161b1b4512a1ab5a60fe6043a46547 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sat, 6 Apr 2024 20:24:16 +0200 Subject: [PATCH 3/3] Document exceptions to short and long names --- docs/rules/unicode-property.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/rules/unicode-property.md b/docs/rules/unicode-property.md index 93391aab0..5f23b8b07 100644 --- a/docs/rules/unicode-property.md +++ b/docs/rules/unicode-property.md @@ -174,6 +174,10 @@ This option controls whether the short or long form is required. Which forms is If the option is set to a string instead of an object, it will be used for all property types. +> NOTE: The `"short"` and `"long"` options follow the [Unicode standard](https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt) for short and long names. However, short names aren't always shorter than long names. E.g. the short name for `p{sc=Han}` is `\p{sc=Hani}`. +> +> There are also some properties that don't have a short name, such as `\p{sc=Thai}`, and some that have additional aliases that can be longer than the long name, such as `\p{Mark}` (long) with its short name `\p{M}` and alias `\p{Combining_Mark}`. + #### Examples All set to `"long"`: