Skip to content

Commit

Permalink
Fix parsing of inline code blocks with multiple backticks (#260)
Browse files Browse the repository at this point in the history
Fix parsing of inline code blocks with multiple backticks
  • Loading branch information
srawlins authored Sep 13, 2019
1 parent bcda511 commit b677ece
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 34 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Improve strict spec compliance for `code` elements defined with "\`".
* Properly encode `<`, `>`, and `"` as their respective HTML entities when
interpreted as text.
* Improve inline code parsing when using multiple backticks.

## 2.0.3

Expand Down
23 changes: 19 additions & 4 deletions lib/src/block_parser.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'package:charcode/charcode.dart';

import 'ast.dart';
import 'document.dart';
import 'util.dart';
Expand All @@ -25,7 +27,7 @@ final _blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$');
final _indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$');

/// Fenced code block.
final _codePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$');
final _codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$');

/// Three or more hyphens, asterisks or underscores by themselves. Note that
/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
Expand Down Expand Up @@ -265,7 +267,7 @@ class SetextHeaderSyntax extends BlockSyntax {

bool _interperableAsParagraph(String line) =>
!(_indentPattern.hasMatch(line) ||
_codePattern.hasMatch(line) ||
_codeFencePattern.hasMatch(line) ||
_headerPattern.hasMatch(line) ||
_blockquotePattern.hasMatch(line) ||
_hrPattern.hasMatch(line) ||
Expand Down Expand Up @@ -404,12 +406,25 @@ class CodeBlockSyntax extends BlockSyntax {

/// Parses preformatted code blocks between two ~~~ or ``` sequences.
///
/// See [Pandoc's documentation](http://pandoc.org/README.html#fenced-code-blocks).
/// See the CommonMark spec: https://spec.commonmark.org/0.29/#fenced-code-blocks
class FencedCodeBlockSyntax extends BlockSyntax {
RegExp get pattern => _codePattern;
RegExp get pattern => _codeFencePattern;

const FencedCodeBlockSyntax();

bool canParse(BlockParser parser) {
final match = pattern.firstMatch(parser.current);
if (match == null) return false;
final codeFence = match.group(1);
final infoString = match.group(2);
// From the CommonMark spec:
//
// > If the info string comes after a backtick fence, it may not contain
// > any backtick characters.
return (codeFence.codeUnitAt(0) != $backquote ||
!infoString.codeUnits.contains($backquote));
}

List<String> parseChildLines(BlockParser parser, [String endBlock]) {
if (endBlock == null) endBlock = '';

Expand Down
3 changes: 1 addition & 2 deletions test/common_mark/code_spans.unit
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ baz`
>>> Code spans - 347
```foo``
<<<
<pre><code class="language-foo``">
</code></pre>
<p>```foo``</p>
>>> Code spans - 348
`foo
<<<
Expand Down
10 changes: 4 additions & 6 deletions test/common_mark/fenced_code_blocks.unit
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,8 @@ aaa
``` ```
aaa
<<<
<pre><code class="language-```">aaa

</code></pre>
<p><code></code>
aaa</p>
>>> Fenced code blocks - 109
~~~~~~
aaa
Expand Down Expand Up @@ -232,9 +231,8 @@ end
``` aa ```
foo
<<<
<pre><code class="language-aa">foo

</code></pre>
<p><code>aa</code>
foo</p>
>>> Fenced code blocks - 116
~~~ aa ``` ~~~
foo
Expand Down
3 changes: 1 addition & 2 deletions test/gfm/code_spans.unit
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ baz`
>>> Code spans - 357
```foo``
<<<
<pre><code class="language-foo``">
</code></pre>
<p>```foo``</p>
>>> Code spans - 358
`foo
<<<
Expand Down
10 changes: 4 additions & 6 deletions test/gfm/fenced_code_blocks.unit
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,8 @@ aaa
``` ```
aaa
<<<
<pre><code class="language-```">aaa

</code></pre>
<p><code></code>
aaa</p>
>>> Fenced code blocks - 109
~~~~~~
aaa
Expand Down Expand Up @@ -232,9 +231,8 @@ end
``` aa ```
foo
<<<
<pre><code class="language-aa">foo

</code></pre>
<p><code>aa</code>
foo</p>
>>> Fenced code blocks - 116
~~~ aa ``` ~~~
foo
Expand Down
6 changes: 3 additions & 3 deletions tool/common_mark_stats.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
"344": "strict",
"345": "strict",
"346": "strict",
"347": "fail",
"347": "strict",
"348": "strict",
"349": "strict"
},
Expand Down Expand Up @@ -281,14 +281,14 @@
"105": "strict",
"106": "strict",
"107": "loose",
"108": "fail",
"108": "loose",
"109": "loose",
"110": "strict",
"111": "strict",
"112": "strict",
"113": "strict",
"114": "strict",
"115": "fail",
"115": "strict",
"116": "strict",
"117": "fail"
},
Expand Down
8 changes: 4 additions & 4 deletions tool/common_mark_stats.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
12 of 13 – 92.3% Backslash escapes
1 of 1 – 100.0% Blank lines
22 of 25 – 88.0% Block quotes
21 of 22 – 95.5% Code spans
22 of 22 – 100.0% Code spans
125 of 131 – 95.4% Emphasis and strong emphasis
14 of 17 – 82.4% Entity and numeric character references
26 of 29 – 89.7% Fenced code blocks
28 of 29 – 96.6% Fenced code blocks
15 of 15 – 100.0% Hard line breaks
43 of 43 – 100.0% HTML blocks
21 of 22 – 95.5% Images
Expand All @@ -24,5 +24,5 @@
11 of 11 – 100.0% Tabs
3 of 3 – 100.0% Textual content
19 of 19 – 100.0% Thematic breaks
598 of 649 – 92.1% TOTAL
525 of 598 – 87.8% TOTAL Strict
601 of 649 – 92.6% TOTAL
527 of 601 – 87.7% TOTAL Strict
6 changes: 3 additions & 3 deletions tool/gfm_stats.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
"354": "strict",
"355": "strict",
"356": "strict",
"357": "fail",
"357": "strict",
"358": "strict",
"359": "strict"
},
Expand Down Expand Up @@ -297,14 +297,14 @@
"105": "strict",
"106": "strict",
"107": "loose",
"108": "fail",
"108": "loose",
"109": "loose",
"110": "strict",
"111": "strict",
"112": "strict",
"113": "strict",
"114": "strict",
"115": "fail",
"115": "strict",
"116": "strict",
"117": "fail"
},
Expand Down
8 changes: 4 additions & 4 deletions tool/gfm_stats.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
12 of 13 – 92.3% Backslash escapes
1 of 1 – 100.0% Blank lines
22 of 25 – 88.0% Block quotes
21 of 22 – 95.5% Code spans
22 of 22 – 100.0% Code spans
0 of 1 – 0.0% Disallowed Raw HTML (extension)
125 of 131 – 95.4% Emphasis and strong emphasis
14 of 17 – 82.4% Entity and numeric character references
26 of 29 – 89.7% Fenced code blocks
28 of 29 – 96.6% Fenced code blocks
15 of 15 – 100.0% Hard line breaks
43 of 43 – 100.0% HTML blocks
21 of 22 – 95.5% Images
Expand All @@ -28,5 +28,5 @@
11 of 11 – 100.0% Tabs
3 of 3 – 100.0% Textual content
19 of 19 – 100.0% Thematic breaks
613 of 671 – 91.4% TOTAL
531 of 613 – 86.6% TOTAL Strict
616 of 671 – 91.8% TOTAL
533 of 616 – 86.5% TOTAL Strict

0 comments on commit b677ece

Please sign in to comment.