From 2b7e136feb9595a76d97b0307e86c287a0bff401 Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Tue, 19 Oct 2021 20:18:48 +0200 Subject: [PATCH] dev: Improve grammar matching. (#1902) * dev: Improve grammar matching. * fix testing issue. --- cspell-dict.txt | 1 + .../fixtures/tokenizer-tests/test.md | 164 ++++ packages/cspell-grammar/samples/sampleJest.ts | 27 + packages/cspell-grammar/src/grammars/index.ts | 2 + .../cspell-grammar/src/grammars/typescript.ts | 78 +- packages/cspell-grammar/src/index.ts | 3 +- .../__snapshots__/tokenizeLine.test.ts.snap | 316 +++++++ .../src/parser/grammarNormalizer.test.ts | 12 +- .../src/parser/grammarNormalizer.ts | 4 +- .../src/parser/grammarTypesHelpers.ts | 2 +- packages/cspell-grammar/src/parser/index.ts | 3 + .../src/parser/processors/procMatchingRule.ts | 2 + .../src/parser/tokenizeLine.test.ts | 44 + .../cspell-grammar/src/parser/tokenizeLine.ts | 2 +- .../__snapshots__/visualizeAsMD.test.ts.snap | 851 ++++++++++++++++++ .../src/viewer/escapeMarkdown.test.ts | 21 + .../src/viewer/escapeMarkdown.ts | 50 + .../src/viewer/markdownHelper.ts | 5 + .../src/viewer/visualizeAsMD.test.ts | 54 ++ .../src/viewer/visualizeAsMD.ts | 43 + 20 files changed, 1658 insertions(+), 26 deletions(-) create mode 100644 packages/cspell-grammar/samples/sampleJest.ts create mode 100644 packages/cspell-grammar/src/grammars/index.ts create mode 100644 packages/cspell-grammar/src/parser/index.ts create mode 100644 packages/cspell-grammar/src/viewer/__snapshots__/visualizeAsMD.test.ts.snap create mode 100644 packages/cspell-grammar/src/viewer/escapeMarkdown.test.ts create mode 100644 packages/cspell-grammar/src/viewer/escapeMarkdown.ts create mode 100644 packages/cspell-grammar/src/viewer/markdownHelper.ts create mode 100644 packages/cspell-grammar/src/viewer/visualizeAsMD.test.ts create mode 100644 packages/cspell-grammar/src/viewer/visualizeAsMD.ts diff --git a/cspell-dict.txt b/cspell-dict.txt index cd42bf6b346..7bcc33afa0d 100644 --- a/cspell-dict.txt +++ b/cspell-dict.txt @@ -14,6 +14,7 @@ dependabot deserialize deserializer deserializers +DocBlock exonum gimu globstar diff --git a/packages/cspell-grammar/fixtures/tokenizer-tests/test.md b/packages/cspell-grammar/fixtures/tokenizer-tests/test.md index 1a0d677b13f..c20714bd8cb 100644 --- a/packages/cspell-grammar/fixtures/tokenizer-tests/test.md +++ b/packages/cspell-grammar/fixtures/tokenizer-tests/test.md @@ -20,4 +20,168 @@ | `")"` | | | `")"` | | +- `0`: const greeting = "hello";↩ + + | text | scope | + | ------------------------------- | --------------------------------- | + | const greeting = | source.ts | + | " | string.quoted.double.ts source.ts | + | hello | string.quoted.double.ts source.ts | + | " | string.quoted.double.ts source.ts | + | ;↩ | source.ts | + +# Sample TypeScript file + +- `0`: import { tokenizedLinesToMarkdown } from './visualizeAsMD';↩ + + | text | scope | + | ------------------------------------------------------ | --------------------------------- | + | import { tokenizedLinesToMarkdown } from | source.ts | + | ' | string.quoted.single.ts source.ts | + | ./visualizeAsMD | string.quoted.single.ts source.ts | + | ' | string.quoted.single.ts source.ts | + | ;↩ | source.ts | + +- `1`: import { TypeScript } from '../grammars';↩ + + | text | scope | + | ---------------------------------------- | --------------------------------- | + | import { TypeScript } from | source.ts | + | ' | string.quoted.single.ts source.ts | + | ../grammars | string.quoted.single.ts source.ts | + | ' | string.quoted.single.ts source.ts | + | ;↩ | source.ts | + +- `2`: import { normalizeGrammar } from '../parser/grammarNormalizer';↩ + + | text | scope | + | ---------------------------------------------- | --------------------------------- | + | import { normalizeGrammar } from | source.ts | + | ' | string.quoted.single.ts source.ts | + | ../parser/grammarNormalizer | string.quoted.single.ts source.ts | + | ' | string.quoted.single.ts source.ts | + | ;↩ | source.ts | + +- `3`: import { tokenizeText } from '../dist';↩ + + | text | scope | + | ------------------------------------------ | --------------------------------- | + | import { tokenizeText } from | source.ts | + | ' | string.quoted.single.ts source.ts | + | ../dist | string.quoted.single.ts source.ts | + | ' | string.quoted.single.ts source.ts | + | ;↩ | source.ts | + +- `4`: + + | text | scope | + | -------------- | --------- | + | | source.ts | + +- `5`: describe('visualizeAsMD', () => {↩ + + | text | scope | + | ---------------------------------- | --------------------------------- | + | describe( | source.ts | + | ' | string.quoted.single.ts source.ts | + | visualizeAsMD | string.quoted.single.ts source.ts | + | ' | string.quoted.single.ts source.ts | + | , () => {↩ | source.ts | + +- `6`: const gTypeScript = normalizeGrammar(TypeScript.grammar);↩ + + | text | scope | + | -------------------------------------------------------------------------------- | --------- | + | const gTypeScript = normalizeGrammar(TypeScript.grammar);↩ | source.ts | + +- `7`: + + | text | scope | + | -------------- | --------- | + | | source.ts | + +- `8`: test.each`↩ + + | text | scope | + | ----------------------- | ---------------------------- | + | test.each | source.ts | + | ` | string.template.ts source.ts | + | | string.template.ts source.ts | + +- `9`: lines↩ + + | text | scope | + | -------------------- | ---------------------------- | + | lines↩ | string.template.ts source.ts | + +- `10`: \${tokenize('')}↩ + + | text | scope | + | ----------------------------------------------- | ---------------------------- | + | \${tokenize('')}↩ | string.template.ts source.ts | + +- `11`: \${tokenize('\\tconst greeting = "hello";\n')}↩ + + | text | scope | + | ------------------------------------------------ | --------------------------------------------------------- | + | \${tokenize(' | string.template.ts source.ts | + | \t | constant.character.escape.ts string.template.ts source.ts | + | const greeting = "hello"; | string.template.ts source.ts | + | \n | constant.character.escape.ts string.template.ts source.ts | + | ')}↩ | string.template.ts source.ts | + +- `12`: `('tokenizedLinesToMarkdown', ({ lines }) => {↩ + + | text | scope | + | ------------------------------------------- | --------------------------------- | + | | string.template.ts source.ts | + | ` | string.template.ts source.ts | + | ( | source.ts | + | ' | string.quoted.single.ts source.ts | + | tokenizedLinesToMarkdown | string.quoted.single.ts source.ts | + | ' | string.quoted.single.ts source.ts | + | , ({ lines }) => {↩ | source.ts | + +- `13`: expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot();↩ + + | text | scope | + | ------------------------------------------------------------------------------------------------- | --------- | + | expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot();↩ | source.ts | + +- `14`: });↩ + + | text | scope | + | ---------------------- | --------- | + | });↩ | source.ts | + +- `15`: + + | text | scope | + | -------------- | --------- | + | | source.ts | + +- `16`: function tokenize(text: string) {↩ + + | text | scope | + | -------------------------------------------------------- | --------- | + | function tokenize(text: string) {↩ | source.ts | + +- `17`: return tokenizeText(text, gTypeScript);↩ + + | text | scope | + | -------------------------------------------------------------- | --------- | + | return tokenizeText(text, gTypeScript);↩ | source.ts | + +- `18`: }↩ + + | text | scope | + | ---------------- | --------- | + | }↩ | source.ts | + +- `19`: });↩ + + | text | scope | + | --------------------- | --------- | + | });↩ | source.ts | + diff --git a/packages/cspell-grammar/samples/sampleJest.ts b/packages/cspell-grammar/samples/sampleJest.ts new file mode 100644 index 00000000000..6db9c72af0e --- /dev/null +++ b/packages/cspell-grammar/samples/sampleJest.ts @@ -0,0 +1,27 @@ +import { tokenizedLinesToMarkdown } from './visualizeAsMD'; +import { TypeScript } from '../grammars'; +import { normalizeGrammar } from '../parser/grammarNormalizer'; +import { tokenizeText } from '../dist'; + +const sampleText = ` + ${ + '.'.repeat(22) + // Comment + { name: 'First' }.name + } +`; + +describe('visualizeAsMD', () => { + const gTypeScript = normalizeGrammar(TypeScript.grammar); + + test.each` + lines + ${tokenize('')} + ${tokenize('\tconst greeting = "hello";\n')} + `('tokenizedLinesToMarkdown', ({ lines }) => { + expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot(); + }); + + function tokenize(text: string) { + return tokenizeText(text, gTypeScript); + } +}); diff --git a/packages/cspell-grammar/src/grammars/index.ts b/packages/cspell-grammar/src/grammars/index.ts new file mode 100644 index 00000000000..c7caff093d6 --- /dev/null +++ b/packages/cspell-grammar/src/grammars/index.ts @@ -0,0 +1,2 @@ +export * as TypeScript from './typescript'; +export * as Markdown from './markdown'; diff --git a/packages/cspell-grammar/src/grammars/typescript.ts b/packages/cspell-grammar/src/grammars/typescript.ts index ac14cf773ba..7220b3500ea 100644 --- a/packages/cspell-grammar/src/grammars/typescript.ts +++ b/packages/cspell-grammar/src/grammars/typescript.ts @@ -2,7 +2,8 @@ import { Grammar, Repository } from '..'; const repository: Repository = { statements: { - patterns: [{ include: '#string' }, { include: '#comment' }], + name: 'code.ts', + patterns: ['#string', '#comment', '#braces'], }, string: { patterns: [{ include: '#string_q_single' }, { include: '#string_q_double' }, { include: '#string_template' }], @@ -11,38 +12,85 @@ const repository: Repository = { name: 'string.quoted.single.ts', begin: "'", end: /'|((?:[^\\\n])$)/, + captures: 'punctuation.string.ts', patterns: [{ include: '#string_character_escape' }], }, string_q_double: { name: 'string.quoted.double.ts', begin: '"', end: /"|((?:[^\\\n])$)/, + captures: 'punctuation.string.ts', patterns: [{ include: '#string_character_escape' }], }, string_template: { name: 'string.template.ts', begin: '`', end: '`', - patterns: [{ include: '#string_character_escape' }], - }, - string_wrap: { - match: /(?:[^\\\n])$/, + captures: 'punctuation.string.ts', + patterns: [ + { + name: 'meta.template.expression.ts', + contentName: 'meta.embedded.line.ts', + begin: '${', + end: '}', + patterns: ['#statements'], + captures: 'punctuation.definition.template.expression.ts', + }, + { include: '#string_character_escape' }, + ], }, string_character_escape: { name: 'constant.character.escape.ts', match: /\\(x[0-9A-Fa-f]{2}|[0-3][0-7]{0,2}|[4-7][0-7]?|.|$)/, }, - comment: { - patterns: [{ include: '#comment_line' }, { include: '#comment_block' }], + braces: { + patterns: [ + { + begin: '(', + end: ')', + captures: 'punctuation.meta.brace.ts', + patterns: ['#statements'], + contentName: 'meta.brace.ts', + }, + { + begin: '{', + end: '}', + captures: 'punctuation.meta.brace.ts', + patterns: ['#statements'], + contentName: 'meta.brace.ts', + }, + { + begin: '[', + end: ']', + captures: 'punctuation.meta.brace.ts', + patterns: ['#statements'], + contentName: 'meta.brace.ts', + }, + ], }, - comment_line: { - name: 'comment.line.ts', - match: /\/\/.*/, - }, - comment_block: { - name: 'comment.block.ts', - begin: '/*', - end: '*/', + comment: { + patterns: [ + { + name: 'comment.line.ts', + comment: 'line comment', + begin: '//', + end: /(?=$)/, + captures: 'punctuation.definition.comment.ts', + }, + { + name: 'comment.block.documentation.ts', + comment: 'DocBlock', + begin: /\*\*(?!\/)/, + captures: 'punctuation.definition.comment.ts', + end: '*/', + }, + { + name: 'comment.block.ts', + begin: '/*', + end: '*/', + captures: 'punctuation.definition.comment.ts', + }, + ], }, }; diff --git a/packages/cspell-grammar/src/index.ts b/packages/cspell-grammar/src/index.ts index c46cc950658..762edfaa833 100644 --- a/packages/cspell-grammar/src/index.ts +++ b/packages/cspell-grammar/src/index.ts @@ -1 +1,2 @@ -export type { Grammar, Pattern, Repository } from './parser/grammarDefinition'; +export type { Grammar, Pattern, Repository } from './parser'; +export { tokenizeLine, tokenizeText } from './parser'; diff --git a/packages/cspell-grammar/src/parser/__snapshots__/tokenizeLine.test.ts.snap b/packages/cspell-grammar/src/parser/__snapshots__/tokenizeLine.test.ts.snap index f00fcf0caf5..f1251bcc499 100644 --- a/packages/cspell-grammar/src/parser/__snapshots__/tokenizeLine.test.ts.snap +++ b/packages/cspell-grammar/src/parser/__snapshots__/tokenizeLine.test.ts.snap @@ -45,6 +45,322 @@ Array [ ] `; +exports[`tokenizeLine tokenizeText TypeScript "n = 42; // comment." - 1`] = ` +Array [ + 0: "n = 42; // comment.": + 0: "n = 42; " -- source.ts + 8: "//" -- punctuation.definition.comment.ts comment.line.ts code.ts source.ts + 10: " comment." -- comment.line.ts code.ts source.ts + 19: "" -- punctuation.definition.comment.ts comment.line.ts code.ts source.ts, +] +`; + +exports[`tokenizeLine tokenizeText TypeScript "n = 42; // comment.\\n\\n" - 1`] = ` +Array [ + 0: "n = 42; // comment.↩": + 0: "n = 42; " -- source.ts + 8: "//" -- punctuation.definition.comment.ts comment.line.ts code.ts source.ts + 10: " comment." -- comment.line.ts code.ts source.ts + 19: "" -- punctuation.definition.comment.ts comment.line.ts code.ts source.ts + 19: "↩" -- source.ts, + 1: "↩": + 0: "↩" -- source.ts, +] +`; + +exports[`tokenizeLine tokenizeText TypeScript "n = 42; // comment.\\nq = n + 1;\\n" - 1`] = ` +Array [ + 0: "n = 42; // comment.↩": + 0: "n = 42; " -- source.ts + 8: "//" -- punctuation.definition.comment.ts comment.line.ts code.ts source.ts + 10: " comment." -- comment.line.ts code.ts source.ts + 19: "" -- punctuation.definition.comment.ts comment.line.ts code.ts source.ts + 19: "↩" -- source.ts, + 1: "q = n + 1;↩": + 0: "q = n + 1;↩" -- source.ts, +] +`; + +exports[`tokenizeLine tokenizeText TypeScript sampleCode - 1`] = ` +Array [ + 0: "import { tokenizedLinesToMarkdown } from './visualizeAsMD';↩": + 0: "import " -- source.ts + 7: "{" -- punctuation.meta.brace.ts code.ts source.ts + 8: " tokenizedLinesToMarkdown " -- meta.brace.ts code.ts source.ts + 34: "}" -- punctuation.meta.brace.ts code.ts source.ts + 35: " from " -- source.ts + 41: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 42: "./visualizeAsMD" -- string.quoted.single.ts code.ts source.ts + 57: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 58: ";↩" -- source.ts, + 1: "import { TypeScript } from '../grammars';↩": + 0: "import " -- source.ts + 7: "{" -- punctuation.meta.brace.ts code.ts source.ts + 8: " TypeScript " -- meta.brace.ts code.ts source.ts + 20: "}" -- punctuation.meta.brace.ts code.ts source.ts + 21: " from " -- source.ts + 27: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 28: "../grammars" -- string.quoted.single.ts code.ts source.ts + 39: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 40: ";↩" -- source.ts, + 2: "import { normalizeGrammar } from '../parser/grammarNormalizer';↩": + 0: "import " -- source.ts + 7: "{" -- punctuation.meta.brace.ts code.ts source.ts + 8: " normalizeGrammar " -- meta.brace.ts code.ts source.ts + 26: "}" -- punctuation.meta.brace.ts code.ts source.ts + 27: " from " -- source.ts + 33: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 34: "../parser/grammarNormalizer" -- string.quoted.single.ts code.ts source.ts + 61: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 62: ";↩" -- source.ts, + 3: "import { tokenizeText } from '../dist';↩": + 0: "import " -- source.ts + 7: "{" -- punctuation.meta.brace.ts code.ts source.ts + 8: " tokenizeText " -- meta.brace.ts code.ts source.ts + 22: "}" -- punctuation.meta.brace.ts code.ts source.ts + 23: " from " -- source.ts + 29: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 30: "../dist" -- string.quoted.single.ts code.ts source.ts + 37: "'" -- punctuation.string.ts string.quoted.single.ts code.ts source.ts + 38: ";↩" -- source.ts, + 4: "↩": + 0: "↩" -- source.ts, + 5: "const sampleText = \`↩": + 0: "const sampleText = " -- source.ts + 19: "\`" -- punctuation.string.ts string.template.ts code.ts source.ts + 20: "↩" -- string.template.ts code.ts source.ts, + 6: " \${↩": + 0: " " -- string.template.ts code.ts source.ts + 4: "\${" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts + 6: "↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 7: " '.'.repeat(22) + // Comment↩": + 0: " " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 8: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 9: "." -- string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 10: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 11: ".repeat" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 18: "(" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 19: "22" -- meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 21: ")" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 22: " + " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 25: "//" -- punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 27: " Comment" -- comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 35: "" -- punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 35: "↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 8: " { name: 'First' }.name↩": + 0: " " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 8: "{" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 9: " name: " -- meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 16: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 17: "First" -- string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 22: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 23: " " -- meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 24: "}" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 25: ".name↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 9: " }↩": + 0: " " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 4: "}" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts + 5: "↩" -- string.template.ts code.ts source.ts, + 10: "\`;↩": + 0: "\`" -- punctuation.string.ts string.template.ts code.ts source.ts + 1: ";↩" -- source.ts, + 11: "↩": + 0: "↩" -- source.ts, + 12: "describe('visualizeAsMD', () => {↩": + 0: "describe" -- source.ts + 8: "(" -- punctuation.meta.brace.ts code.ts source.ts + 9: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts + 10: "visualizeAsMD" -- string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts + 23: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts + 24: ", " -- meta.brace.ts code.ts source.ts + 26: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 27: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 28: " => " -- meta.brace.ts code.ts source.ts + 32: "{" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 33: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 13: " const gTypeScript = normalizeGrammar(TypeScript.grammar);↩": + 0: " const gTypeScript = normalizeGrammar" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 40: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 41: "TypeScript.grammar" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 59: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 60: ";↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 14: "↩": + 0: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 15: " test.each\`↩": + 0: " test.each" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 13: "\`" -- punctuation.string.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 14: "↩" -- string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 16: " lines↩": + 0: " lines↩" -- string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 17: " \${tokenize('')}↩": + 0: " " -- string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 8: "\${" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 10: "tokenize" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 18: "(" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 19: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 20: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 21: ")" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 22: "}" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 23: "↩" -- string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 18: " \${tokenize('\\\\tconst greeting = \\"hello\\";\\\\n')}↩": + 0: " " -- string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 8: "\${" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 10: "tokenize" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 18: "(" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 19: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 20: "\\\\t" -- constant.character.escape.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 22: "const greeting = \\"hello\\";" -- string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 47: "\\\\n" -- constant.character.escape.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 49: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 50: ")" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 51: "}" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 52: "↩" -- string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 19: " \`('tokenizedLinesToMarkdown', ({ lines }) => {↩": + 0: " " -- string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 4: "\`" -- punctuation.string.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 5: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 6: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 7: "tokenizedLinesToMarkdown" -- string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 31: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 32: ", " -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 34: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 35: "{" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 36: " lines " -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 43: "}" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 44: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 45: " => " -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 49: "{" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 50: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 20: " expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot();↩": + 0: " expect" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 14: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 15: "tokenizedLinesToMarkdown" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 39: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 40: "lines" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 45: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 46: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 47: ".toMatchSnapshot" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 63: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 64: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 65: ";↩" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 21: " });↩": + 0: " " -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 4: "}" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 5: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 6: ";↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 22: "↩": + 0: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 23: " function tokenize(text: string) {↩": + 0: " function tokenize" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 21: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 22: "text: string" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 34: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 35: " " -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 36: "{" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 37: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 24: " return tokenizeText(text, gTypeScript);↩": + 0: " return tokenizeText" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 27: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 28: "text, gTypeScript" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 45: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 46: ";↩" -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 25: " }↩": + 0: " " -- meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 4: "}" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 5: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 26: "});↩": + 0: "}" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 1: ")" -- punctuation.meta.brace.ts code.ts source.ts + 2: ";↩" -- source.ts, +] +`; + +exports[`tokenizeLine tokenizeText TypeScript sampleTemplate - 1`] = ` +Array [ + 0: "msg = \`↩": + 0: "msg = " -- source.ts + 6: "\`" -- punctuation.string.ts string.template.ts code.ts source.ts + 7: "↩" -- string.template.ts code.ts source.ts, + 1: "\${↩": + 0: "\${" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts + 2: "↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 2: "a + b // Join prefix and suffix↩": + 0: "a + b " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 6: "//" -- punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 8: " Join prefix and suffix" -- comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 31: "" -- punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 31: "↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 3: "}↩": + 0: "}" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts + 1: "↩" -- string.template.ts code.ts source.ts, + 4: "\`;": + 0: "\`" -- punctuation.string.ts string.template.ts code.ts source.ts + 1: ";" -- source.ts, +] +`; + +exports[`tokenizeLine tokenizeText TypeScript sampleTemplate2 - 1`] = ` +Array [ + 0: "↩": + 0: "↩" -- source.ts, + 1: "const sampleText = \`↩": + 0: "const sampleText = " -- source.ts + 19: "\`" -- punctuation.string.ts string.template.ts code.ts source.ts + 20: "↩" -- string.template.ts code.ts source.ts, + 2: " \${↩": + 0: " " -- string.template.ts code.ts source.ts + 4: "\${" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts + 6: "↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 3: " '.'.repeat(22) + // Comment↩": + 0: " " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 8: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 9: "." -- string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 10: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 11: ".repeat" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 18: "(" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 19: "22" -- meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 21: ")" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 22: " + " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 25: "//" -- punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 27: " Comment" -- comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 35: "" -- punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 35: "↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 4: " { name: 'First' }.name↩": + 0: " " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 8: "{" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 9: " name: " -- meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 16: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 17: "First" -- string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 22: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 23: " " -- meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 24: "}" -- punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 25: ".name↩" -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts, + 5: " }↩": + 0: " " -- meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts + 4: "}" -- punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts + 5: "↩" -- string.template.ts code.ts source.ts, + 6: "\`;↩": + 0: "\`" -- punctuation.string.ts string.template.ts code.ts source.ts + 1: ";↩" -- source.ts, + 7: "↩": + 0: "↩" -- source.ts, + 8: "describe('visualizeAsMD', () => {↩": + 0: "describe" -- source.ts + 8: "(" -- punctuation.meta.brace.ts code.ts source.ts + 9: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts + 10: "visualizeAsMD" -- string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts + 23: "'" -- punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts + 24: ", " -- meta.brace.ts code.ts source.ts + 26: "(" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 27: ")" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 28: " => " -- meta.brace.ts code.ts source.ts + 32: "{" -- punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts + 33: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, + 9: "↩": + 0: "↩" -- meta.brace.ts code.ts meta.brace.ts code.ts source.ts, +] +`; + exports[`tokenizeLine tokenizeText complex sample - 1`] = ` Array [ 0: "↩": diff --git a/packages/cspell-grammar/src/parser/grammarNormalizer.test.ts b/packages/cspell-grammar/src/parser/grammarNormalizer.test.ts index 5d1e2c8ab2b..338a79f394e 100644 --- a/packages/cspell-grammar/src/parser/grammarNormalizer.test.ts +++ b/packages/cspell-grammar/src/parser/grammarNormalizer.test.ts @@ -10,12 +10,12 @@ describe('grammarNormalizer', () => { }); test.each` - line | offset | expectedScope | expectedMatch - ${''} | ${0} | ${undefined} | ${undefined} - ${"import * as p from 'path';\n"} | ${0} | ${['string.quoted.single.ts', 'source.ts']} | ${oc({ index: 19, matches: ["'"] })} - ${`x = "a's" + 'b'; // comment\n`} | ${0} | ${['string.quoted.double.ts', 'source.ts']} | ${oc({ index: 4, matches: ['"'] })} - ${`x = "a's" + 'b'; // comment\n`} | ${9} | ${['string.quoted.single.ts', 'source.ts']} | ${oc({ index: 12, matches: ["'"] })} - ${`x = "a's" + 'b'; // comment\n`} | ${15} | ${['comment.line.ts', 'source.ts']} | ${oc({ index: 17, match: '// comment' })} + line | offset | expectedScope | expectedMatch + ${''} | ${0} | ${undefined} | ${undefined} + ${"import * as p from 'path';\n"} | ${0} | ${['string.quoted.single.ts', 'code.ts', 'source.ts']} | ${oc({ index: 19, matches: ["'"] })} + ${`x = "a's" + 'b'; // comment\n`} | ${0} | ${['string.quoted.double.ts', 'code.ts', 'source.ts']} | ${oc({ index: 4, matches: ['"'] })} + ${`x = "a's" + 'b'; // comment\n`} | ${9} | ${['string.quoted.single.ts', 'code.ts', 'source.ts']} | ${oc({ index: 12, matches: ["'"] })} + ${`x = "a's" + 'b'; // comment\n`} | ${15} | ${['comment.line.ts', 'code.ts', 'source.ts']} | ${oc({ index: 17, match: '//' })} `('normalizeGrammar.exec', ({ line, offset, expectedScope, expectedMatch }) => { const grammar = normalizeGrammar(grammarTS); const lineOff: LineOffsetAnchored = { text: line, offset, lineNumber: 5, anchor: -1 }; diff --git a/packages/cspell-grammar/src/parser/grammarNormalizer.ts b/packages/cspell-grammar/src/parser/grammarNormalizer.ts index 77a54866c60..f87371a6e17 100644 --- a/packages/cspell-grammar/src/parser/grammarNormalizer.ts +++ b/packages/cspell-grammar/src/parser/grammarNormalizer.ts @@ -127,7 +127,7 @@ function normalizePatternBeginEnd(p: PatternBeginEnd): NPatternBeginEnd { }; const testBegin = makeTestMatchFn(p.begin); - const testEnd = p.end ? makeTestMatchFn(p.end) : () => undefined; + const testEnd = p.end !== undefined ? makeTestMatchFn(p.end) : () => undefined; function bind(parentRule: Rule): Rule { const rule: Rule = { @@ -373,7 +373,7 @@ function matchString(s: string): (line: LineOffsetAnchored) => MatchResult | und function matchRegExp(r: RegExp): (line: LineOffsetAnchored) => MatchResult | undefined { return (line) => { - const rg = RegExp(r, 'g'); + const rg = RegExp(r, 'gm'); rg.lastIndex = line.offset; const m = rg.exec(line.text); return (m && createMatchResult(m)) ?? undefined; diff --git a/packages/cspell-grammar/src/parser/grammarTypesHelpers.ts b/packages/cspell-grammar/src/parser/grammarTypesHelpers.ts index fb249369d71..75384f4587b 100644 --- a/packages/cspell-grammar/src/parser/grammarTypesHelpers.ts +++ b/packages/cspell-grammar/src/parser/grammarTypesHelpers.ts @@ -19,7 +19,7 @@ export function isPatternMatch(pattern: Pattern): pattern is PatternMatch { export function isPatternBeginEnd(pattern: Pattern): pattern is PatternBeginEnd { const p = pattern; - return !!p.begin && typeof p.begin in TypeofBegin && typeof p.end in TypeofEnd; + return p.begin !== undefined && typeof p.begin in TypeofBegin && typeof p.end in TypeofEnd; } export function isPatternPatterns(p: Pattern): p is PatternPatterns { diff --git a/packages/cspell-grammar/src/parser/index.ts b/packages/cspell-grammar/src/parser/index.ts new file mode 100644 index 00000000000..e4e2d5e157a --- /dev/null +++ b/packages/cspell-grammar/src/parser/index.ts @@ -0,0 +1,3 @@ +export type { Grammar, Pattern, Repository } from './grammarDefinition'; +export type { Line, LineOffsetAnchored, TokenizedLine, TokenizedLineResult, TokenizedText } from './types'; +export { tokenizeLine, tokenizeText } from './tokenizeLine'; diff --git a/packages/cspell-grammar/src/parser/processors/procMatchingRule.ts b/packages/cspell-grammar/src/parser/processors/procMatchingRule.ts index 4b29bdd0b83..46512ef9637 100644 --- a/packages/cspell-grammar/src/parser/processors/procMatchingRule.ts +++ b/packages/cspell-grammar/src/parser/processors/procMatchingRule.ts @@ -43,6 +43,8 @@ export function applyCaptures(rule: Rule, match: MatchResult, captures: NCapture const scope = extractScope(rule, false); const text = match.match; const input = match.input; + // Do not emit empty captures. + if (!text && !captures) return []; if (!captures) { const tokenized: TokenizedText = { diff --git a/packages/cspell-grammar/src/parser/tokenizeLine.test.ts b/packages/cspell-grammar/src/parser/tokenizeLine.test.ts index 2b412925dbe..b934a70a611 100644 --- a/packages/cspell-grammar/src/parser/tokenizeLine.test.ts +++ b/packages/cspell-grammar/src/parser/tokenizeLine.test.ts @@ -1,5 +1,8 @@ import assert from 'assert'; import * as Simple from '../grammars/simple'; +import { TypeScript } from '../grammars'; +import { readFileSync } from 'fs'; +import * as path from 'path'; import { normalizeGrammar } from './grammarNormalizer'; import { tokenizeText } from './tokenizeLine'; import type { TokenizedLine } from './types'; @@ -7,6 +10,9 @@ import type { TokenizedLine } from './types'; // const oc = expect.objectContaining; const grammar = normalizeGrammar(Simple.grammar); +const grammarTypeScript = normalizeGrammar(TypeScript.grammar); + +const sampleCode = readFileSync(path.join(__dirname, '../../samples/sampleJest.ts'), 'utf8'); describe('tokenizeLine', () => { interface TextAndName { @@ -54,6 +60,44 @@ describe('tokenizeLine', () => { assertParsedLinesAreValid(r); expect(r).toMatchSnapshot(); }); + + const sampleTemplate = '\ +msg = `\n\ +${\n\ +a + b // Join prefix and suffix\n\ +}\n\ +`;\ +'; + + const sampleTemplate2 = ` +const sampleText = \` + \${ + '.'.repeat(22) + // Comment + { name: 'First' }.name + } +\`; + +describe('visualizeAsMD', () => { + +`; + + test.each` + test | comment + ${t('n = 42; // comment.')} | ${''} + ${t('n = 42; // comment.\n\n')} | ${''} + ${t('n = 42; // comment.\nq = n + 1;\n')} | ${''} + ${t(sampleTemplate, 'sampleTemplate')} | ${''} + ${t(sampleTemplate2, 'sampleTemplate2')} | ${''} + ${t(sampleCode, 'sampleCode')} | ${''} + `('tokenizeText TypeScript $test.name - $comment', ({ test }: { test: TextAndName }) => { + expect.addSnapshotSerializer({ + test: isTokenizedLine, + serialize: serializeTokenizedLine, + }); + const r = tokenizeText(test.text, grammarTypeScript); + assertParsedLinesAreValid(r); + expect(r).toMatchSnapshot(); + }); }); // function ocScope(...scope: string[]) { diff --git a/packages/cspell-grammar/src/parser/tokenizeLine.ts b/packages/cspell-grammar/src/parser/tokenizeLine.ts index 9f7c559e0ab..6ef4b48ecc0 100644 --- a/packages/cspell-grammar/src/parser/tokenizeLine.ts +++ b/packages/cspell-grammar/src/parser/tokenizeLine.ts @@ -14,7 +14,7 @@ export function tokenizeLine(line: Line, rule: Rule): TokenizedLineResult { // Note: the begin rule has already been matched and applied. // Does it need to end or go deeper? - while (ctx.line.offset < lineLen) { + while (ctx.line.offset <= lineLen) { // We are at an end let endMatch = ctx.rule.end?.(ctx.line); while (endMatch?.index === ctx.line.offset) { diff --git a/packages/cspell-grammar/src/viewer/__snapshots__/visualizeAsMD.test.ts.snap b/packages/cspell-grammar/src/viewer/__snapshots__/visualizeAsMD.test.ts.snap new file mode 100644 index 00000000000..770ebea1d22 --- /dev/null +++ b/packages/cspell-grammar/src/viewer/__snapshots__/visualizeAsMD.test.ts.snap @@ -0,0 +1,851 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`visualizeAsMD tokenizedLinesToMarkdown 1`] = ` +"
+1: + +| text | scope | +| --------- | -------------------------------------------------------- | + + +
+ + +" +`; + +exports[`visualizeAsMD tokenizedLinesToMarkdown 2`] = ` +"
+1: const greeting = "hello";↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| const greeting = | source.ts | +| " | punctuation.string.ts string.quoted.double.ts code.ts source.ts | +| hello | string.quoted.double.ts code.ts source.ts | +| " | punctuation.string.ts string.quoted.double.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +" +`; + +exports[`visualizeAsMD tokenizedLinesToMarkdown 3`] = ` +"
+1: import { tokenizedLinesToMarkdown } from './visualizeAsMD';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| tokenizedLinesToMarkdown | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ./visualizeAsMD | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+2: import { TypeScript } from '../grammars';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| TypeScript | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ../grammars | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+3: import { normalizeGrammar } from '../parser/grammarNormalizer';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| normalizeGrammar | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ../parser/grammarNormalizer | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+4: import { tokenizeText } from '../dist';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| tokenizeText | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ../dist | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+5: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | source.ts | + +
+ + +
+6: const sampleText = `↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| const sampleText = | source.ts | +| ` | punctuation.string.ts string.template.ts code.ts source.ts | +| | string.template.ts code.ts source.ts | + +
+ + +
+7: \${↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts source.ts | +| \${ | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | + +
+ + +
+8: '.'.repeat(22) + // Comment↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| . | string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| .repeat | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| 22 | meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| + | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| // | punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| Comment | comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | + +
+ + +
+9: { name: 'First' }.name↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| name: | meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| First | string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| .name↩ | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | + +
+ + +
+10: }↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| } | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | string.template.ts code.ts source.ts | + +
+ + +
+11: `;↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| ` | punctuation.string.ts string.template.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+12: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | source.ts | + +
+ + +
+13: describe('visualizeAsMD', () => {↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| describe | source.ts | +| ( | punctuation.meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts | +| visualizeAsMD | string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts | +| , | meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| => | meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+14: const gTypeScript = normalizeGrammar(TypeScript.grammar);↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| const gTypeScript = normalizeGrammar | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| TypeScript.grammar | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+15: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+16: test.each`↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| test.each | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ` | punctuation.string.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+17: lines↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| lines↩ | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+18: \${tokenize('')}↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \${ | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenize | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+19: \${tokenize('\tconst greeting = "hello";\n')}↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \${ | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenize | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \t | constant.character.escape.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| const greeting = "hello"; | string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \n | constant.character.escape.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+20: `('tokenizedLinesToMarkdown', ({ lines }) => {↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ` | punctuation.string.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenizedLinesToMarkdown | string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| , | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| lines | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| => | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+21: expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot();↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| expect | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenizedLinesToMarkdown | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| lines | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| .toMatchSnapshot | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+22: });↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+23: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+24: function tokenize(text: string) {↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| function tokenize | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| text: string | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+25: return tokenizeText(text, gTypeScript);↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| return tokenizeText | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| text, gTypeScript | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+26: }↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+27: });↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +" +`; + +exports[`visualizeAsMD tokenizedLinesToMarkdown file samples/sampleJest.ts 1`] = ` +"
+1: import { tokenizedLinesToMarkdown } from './visualizeAsMD';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| tokenizedLinesToMarkdown | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ./visualizeAsMD | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+2: import { TypeScript } from '../grammars';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| TypeScript | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ../grammars | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+3: import { normalizeGrammar } from '../parser/grammarNormalizer';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| normalizeGrammar | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ../parser/grammarNormalizer | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+4: import { tokenizeText } from '../dist';↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| import | source.ts | +| { | punctuation.meta.brace.ts code.ts source.ts | +| tokenizeText | meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts source.ts | +| from | source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ../dist | string.quoted.single.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+5: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | source.ts | + +
+ + +
+6: const sampleText = `↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| const sampleText = | source.ts | +| ` | punctuation.string.ts string.template.ts code.ts source.ts | +| | string.template.ts code.ts source.ts | + +
+ + +
+7: \${↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts source.ts | +| \${ | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | + +
+ + +
+8: '.'.repeat(22) + // Comment↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| . | string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| .repeat | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| 22 | meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| + | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| // | punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| Comment | comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | punctuation.definition.comment.ts comment.line.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | + +
+ + +
+9: { name: 'First' }.name↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| name: | meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| First | string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| .name↩ | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | + +
+ + +
+10: }↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| } | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts source.ts | +| | string.template.ts code.ts source.ts | + +
+ + +
+11: `;↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| ` | punctuation.string.ts string.template.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +
+12: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | source.ts | + +
+ + +
+13: describe('visualizeAsMD', () => {↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| describe | source.ts | +| ( | punctuation.meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts | +| visualizeAsMD | string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts source.ts | +| , | meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| => | meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+14: const gTypeScript = normalizeGrammar(TypeScript.grammar);↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| const gTypeScript = normalizeGrammar | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| TypeScript.grammar | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+15: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+16: test.each`↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| test.each | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ` | punctuation.string.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+17: lines↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| lines↩ | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+18: \${tokenize('')}↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \${ | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenize | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+19: \${tokenize('\tconst greeting = "hello";\n')}↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \${ | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenize | meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \t | constant.character.escape.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| const greeting = "hello"; | string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| \n | constant.character.escape.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.embedded.line.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.definition.template.expression.ts meta.template.expression.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+20: `('tokenizedLinesToMarkdown', ({ lines }) => {↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ` | punctuation.string.ts string.template.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenizedLinesToMarkdown | string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ' | punctuation.string.ts string.quoted.single.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| , | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| lines | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| => | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+21: expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot();↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| expect | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| tokenizedLinesToMarkdown | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| lines | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| .toMatchSnapshot | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+22: });↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+23: + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+24: function tokenize(text: string) {↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| function tokenize | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| text: string | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| { | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+25: return tokenizeText(text, gTypeScript);↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| return tokenizeText | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ( | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| text, gTypeScript | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ;↩ | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+26: }↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| | meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| | meta.brace.ts code.ts meta.brace.ts code.ts source.ts | + +
+ + +
+27: });↩ + +| text | scope | +| --------- | -------------------------------------------------------- | +| } | punctuation.meta.brace.ts code.ts meta.brace.ts code.ts source.ts | +| ) | punctuation.meta.brace.ts code.ts source.ts | +| ;↩ | source.ts | + +
+ + +" +`; diff --git a/packages/cspell-grammar/src/viewer/escapeMarkdown.test.ts b/packages/cspell-grammar/src/viewer/escapeMarkdown.test.ts new file mode 100644 index 00000000000..eaf90e38578 --- /dev/null +++ b/packages/cspell-grammar/src/viewer/escapeMarkdown.test.ts @@ -0,0 +1,21 @@ +import { escapeHtml, escapeMarkdown } from './escapeMarkdown'; + +describe('escapeMarkdown', () => { + test.each` + text | expected + ${''} | ${''} + ${'/["\'&<>]/'} | ${'/["'&<>]/'} + ${'Good morning'} | ${'Good morning'} + `('escapeHtml $text', ({ text, expected }) => { + expect(escapeHtml(text)).toBe(expected); + }); + + test.each` + text | expected + ${/[-"'&<>`*_+[\]()\\|~]/g.toString()} | ${'/[-"'&<>`*_+[\]()\\|~]/g'} + ${'Good morning'} | ${'Good morning'} + ${'**bold**'} | ${'**bold**'} + `('escapeMarkdown $text', ({ text, expected }) => { + expect(escapeMarkdown(text)).toBe(expected); + }); +}); diff --git a/packages/cspell-grammar/src/viewer/escapeMarkdown.ts b/packages/cspell-grammar/src/viewer/escapeMarkdown.ts new file mode 100644 index 00000000000..b3382c614a4 --- /dev/null +++ b/packages/cspell-grammar/src/viewer/escapeMarkdown.ts @@ -0,0 +1,50 @@ +const matchHtml = /["'&<>]/g; +const matchMarkdown = /[-"'&<>`*_+[\]()\\|~]/g; + +const entityMap = { + '<': '<', + '>': '>', + '&': '&', + '"': '"', + "'": ''', +}; + +const charCodeToEntity: string[] = compileEntities(entityMap); + +export function escapeMarkdown(text: string): string { + return _escape(text, matchMarkdown); +} + +export function escapeHtml(str: string): string { + return _escape(str, matchHtml); +} + +function _escape(str: string, r: RegExp): string { + const cvt = charCodeToEntity; + + let lastIndex = 0; + let html = ''; + + r.lastIndex = 0; + + while (r.test(str)) { + const i = r.lastIndex - 1; + html += str.substring(lastIndex, i) + cvt[str.charCodeAt(i)]; + lastIndex = r.lastIndex; + } + return html + str.substring(lastIndex); +} + +function compileEntities(entityMap: Record): string[] { + const result: string[] = []; + result.length = 127; + + for (let i = 32; i < 128; ++i) { + result[i] = `&#${i};`; + } + + for (const [char, entity] of Object.entries(entityMap)) { + result[char.charCodeAt(0)] = entity; + } + return result; +} diff --git a/packages/cspell-grammar/src/viewer/markdownHelper.ts b/packages/cspell-grammar/src/viewer/markdownHelper.ts new file mode 100644 index 00000000000..2c0c11825bb --- /dev/null +++ b/packages/cspell-grammar/src/viewer/markdownHelper.ts @@ -0,0 +1,5 @@ +import { escapeMarkdown } from './escapeMarkdown'; + +export function toInlineCode(text: string): string { + return `${escapeMarkdown(text.replace(/\r/g, '↤').replace(/\n/g, '↩'))}`; +} diff --git a/packages/cspell-grammar/src/viewer/visualizeAsMD.test.ts b/packages/cspell-grammar/src/viewer/visualizeAsMD.test.ts new file mode 100644 index 00000000000..eab450cb7c5 --- /dev/null +++ b/packages/cspell-grammar/src/viewer/visualizeAsMD.test.ts @@ -0,0 +1,54 @@ +import { tokenizedLinesToMarkdown } from './visualizeAsMD'; +import { TypeScript } from '../grammars'; +import { normalizeGrammar } from '../parser/grammarNormalizer'; +import { tokenizeText } from '..'; +import * as path from 'path'; +import { promises as fs, readFileSync } from 'fs'; + +const pathPackage = path.join(__dirname, '../..'); +const pathSamples = path.join(pathPackage, 'samples'); +const pathTemp = path.join(pathPackage, 'temp'); + +const sampleTypescript = readFileSync(path.join(pathSamples, 'sampleJest.ts'), 'utf8'); + +describe('visualizeAsMD', () => { + const gTypeScript = normalizeGrammar(TypeScript.grammar); + + test.each` + lines + ${tokenize('')} + ${tokenize('\tconst greeting = "hello";\n')} + ${tokenize(sampleTypescript)} + `('tokenizedLinesToMarkdown', ({ lines }) => { + expect(tokenizedLinesToMarkdown(lines)).toMatchSnapshot(); + }); + + test.each` + filename + ${'samples/sampleJest.ts'} + `('tokenizedLinesToMarkdown file $filename', async ({ filename }) => { + filename = path.resolve(pathPackage, filename); + const text = await fs.readFile(filename, 'utf8'); + const lines = tokenize(text); + const md = tokenizedLinesToMarkdown(lines); + expect(md).toMatchSnapshot(); + const tempFilename = path.join(pathTemp, 'samples', path.basename(filename) + '.md'); + await writeFile(tempFilename, `# Tokenized: \`${path.basename(tempFilename)}\`\n\n${md}`); + }); + + function tokenize(text: string) { + return tokenizeText(text, gTypeScript); + } +}); + +async function mkdirForFile(filename: string) { + const dir = path.dirname(filename); + const parent = path.dirname(dir); + await fs.access(parent).catch(() => mkdirForFile(dir)); + return fs.access(dir).catch(() => fs.mkdir(dir)); +} + +async function writeFile(filename: string, content: string) { + await mkdirForFile(filename); + return fs.writeFile(filename, content, 'utf8'); +} diff --git a/packages/cspell-grammar/src/viewer/visualizeAsMD.ts b/packages/cspell-grammar/src/viewer/visualizeAsMD.ts new file mode 100644 index 00000000000..69f27a4252d --- /dev/null +++ b/packages/cspell-grammar/src/viewer/visualizeAsMD.ts @@ -0,0 +1,43 @@ +import type { TokenizedLine } from '../parser'; +import { toInlineCode } from './markdownHelper'; + +export function _tokenizedLineToMarkdown(line: TokenizedLine, indentation = ''): string { + const markdownLines: string[] = []; + + const header = `- \`${line.line.lineNumber + 1}\`: ${toInlineCode(line.line.text)} + + | text | scope | + | --------- | -------------------------------------------------------- |`; + + markdownLines.push(...header.split('\n')); + markdownLines.push(...line.tokens.map((t) => ` | ${toInlineCode(t.text)} | ${t.scope.join(' ')} |`)); + + return markdownLines.map((line) => indentation + line).join('\n') + '\n\n'; +} + +export function tokenizedLineToMarkdown(line: TokenizedLine, indentation = ''): string { + const rows = line.tokens.map((t) => `| ${toInlineCode(t.text)} | ${t.scope.join(' ')} |`); + + const detail = `
+${line.line.lineNumber + 1}: ${toInlineCode(line.line.text)} + +| text | scope | +| --------- | -------------------------------------------------------- | +${rows.join('\n')} + +
+`; + + const markdownLines = detail.split('\n'); + + return ( + markdownLines + .map((line) => indentation + line) + .map((line) => (line.trim() === '' ? '' : line)) + .join('\n') + '\n\n' + ); +} + +export function tokenizedLinesToMarkdown(lines: TokenizedLine[], indentation = ''): string { + return lines.map((line) => tokenizedLineToMarkdown(line, indentation)).join(''); +}