From 9ed6456a37648d6944e549e04e7e441686dd6c6f Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Tue, 3 Sep 2024 18:06:32 -0600 Subject: [PATCH] fix: Don't replace tabs with spaces (#3438) * fix: don't convert tabs to spaces * test exact * save nextLineWithoutTabs * fix code --- src/Lexer.ts | 4 ---- src/Tokenizer.ts | 20 ++++++++++++-------- src/rules.ts | 16 ++++++++-------- test/specs/new/tabs_code.html | 2 ++ test/specs/new/tabs_code.md | 6 ++++++ 5 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 test/specs/new/tabs_code.html create mode 100644 test/specs/new/tabs_code.md diff --git a/src/Lexer.ts b/src/Lexer.ts index d600922bd7..9aa79cdbf9 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -106,10 +106,6 @@ export class _Lexer { blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) { if (this.options.pedantic) { src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); - } else { - src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { - return leading + ' '.repeat(tabs.length); - }); } let token: Tokens.Generic | undefined; diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 5ad91c61e1..80fc59ec7a 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -90,7 +90,7 @@ export class _Tokenizer { code(src: string): Tokens.Code | undefined { const cap = this.rules.block.code.exec(src); if (cap) { - const text = cap[0].replace(/^ {1,4}/gm, ''); + const text = cap[0].replace(/^(?: {1,4}| {0,3}\t)/gm, ''); return { type: 'code', raw: cap[0], @@ -294,7 +294,7 @@ export class _Tokenizer { indent += cap[1].length; } - if (blankLine && /^ *$/.test(nextLine)) { // Items begin with at most one blank line + if (blankLine && /^[ \t]*$/.test(nextLine)) { // Items begin with at most one blank line raw += nextLine + '\n'; src = src.substring(nextLine.length + 1); endEarly = true; @@ -309,11 +309,15 @@ export class _Tokenizer { // Check if following lines should be included in List Item while (src) { const rawLine = src.split('\n', 1)[0]; + let nextLineWithoutTabs; nextLine = rawLine; // Re-align to follow commonmark nesting rules if (this.options.pedantic) { nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' '); + nextLineWithoutTabs = nextLine; + } else { + nextLineWithoutTabs = nextLine.replace(/\t/g, ' '); } // End list item if found code fences @@ -332,12 +336,12 @@ export class _Tokenizer { } // Horizontal rule found - if (hrRegex.test(src)) { + if (hrRegex.test(nextLine)) { break; } - if (nextLine.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible - itemContents += '\n' + nextLine.slice(indent); + if (nextLineWithoutTabs.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible + itemContents += '\n' + nextLineWithoutTabs.slice(indent); } else { // not enough indentation if (blankLine) { @@ -345,7 +349,7 @@ export class _Tokenizer { } // paragraph continuation unless last line was a different block level element - if (line.search(/[^ ]/) >= 4) { // indented code block + if (line.replace(/\t/g, ' ').search(/[^ ]/) >= 4) { // indented code block break; } if (fencesBeginRegex.test(line)) { @@ -367,7 +371,7 @@ export class _Tokenizer { raw += rawLine + '\n'; src = src.substring(rawLine.length + 1); - line = nextLine.slice(indent); + line = nextLineWithoutTabs.slice(indent); } } @@ -375,7 +379,7 @@ export class _Tokenizer { // If the previous item ended with a blank line, the list is loose if (endsWithBlankLine) { list.loose = true; - } else if (/\n *\n *$/.test(raw)) { + } else if (/\n[ \t]*\n[ \t]*$/.test(raw)) { endsWithBlankLine = true; } } diff --git a/src/rules.ts b/src/rules.ts index d6f330ed39..83bc8b3f1f 100644 --- a/src/rules.ts +++ b/src/rules.ts @@ -6,15 +6,15 @@ import { * Block-Level Grammar */ -const newline = /^(?: *(?:\n|$))+/; -const blockCode = /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/; +const newline = /^(?:[ \t]*(?:\n|$))+/; +const blockCode = /^((?: {4}| {0,3}\t)[^\n]+(?:\n(?:[ \t]*(?:\n|$))*)?)+/; const fences = /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/; const hr = /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/; const heading = /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/; const bullet = /(?:[*+-]|\d{1,9}[.)])/; const lheading = edit(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html))+?)\n {0,3}(=+|-+) *(?:\n+|$)/) .replace(/bull/g, bullet) // lists can interrupt - .replace(/blockCode/g, / {4}/) // indented code blocks can interrupt + .replace(/blockCode/g, /(?: {4}| {0,3}\t)/) // indented code blocks can interrupt .replace(/fences/g, / {0,3}(?:`{3,}|~{3,})/) // fenced code blocks can interrupt .replace(/blockquote/g, / {0,3}>/) // blockquote can interrupt .replace(/heading/g, / {0,3}#{1,6}/) // ATX heading can interrupt @@ -23,7 +23,7 @@ const lheading = edit(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.| const _paragraph = /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/; const blockText = /^[^\n]+/; const _blockLabel = /(?!\s*\])(?:\\.|[^\[\]\\])+/; -const def = edit(/^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/) +const def = edit(/^ {0,3}\[(label)\]: *(?:\n[ \t]*)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n[ \t]*)?| *\n[ \t]*)(title))? *(?:\n+|$)/) .replace('label', _blockLabel) .replace('title', /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/) .getRegex(); @@ -46,9 +46,9 @@ const html = edit( + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) + '|\\n*|$)' // (4) + '|\\n*|$)' // (5) -+ '|)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6) -+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag -+ '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag ++ '|)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (6) ++ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) open tag ++ '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) closing tag + ')', 'i') .replace('comment', _comment) .replace('tag', _tag) @@ -104,7 +104,7 @@ const gfmTable = edit( .replace('hr', hr) .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('blockquote', ' {0,3}>') - .replace('code', ' {4}[^\\n]') + .replace('code', '(?: {4}| {0,3}\t)[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|textarea|!--)') diff --git a/test/specs/new/tabs_code.html b/test/specs/new/tabs_code.html new file mode 100644 index 0000000000..f8752e19f9 --- /dev/null +++ b/test/specs/new/tabs_code.html @@ -0,0 +1,2 @@ +
	tab
+
diff --git a/test/specs/new/tabs_code.md b/test/specs/new/tabs_code.md new file mode 100644 index 0000000000..1689e98aa1 --- /dev/null +++ b/test/specs/new/tabs_code.md @@ -0,0 +1,6 @@ +--- +renderExact: true +--- +``` + tab +```