Skip to content

Commit

Permalink
Remove common WS prefix from fenced code blocks the same way I do for…
Browse files Browse the repository at this point in the history
… <pre>s. Fixes #2209.
  • Loading branch information
tabatkins committed Jan 13, 2022
1 parent 0fd7ae7 commit 53a8100
Show file tree
Hide file tree
Showing 24 changed files with 216 additions and 184 deletions.
52 changes: 42 additions & 10 deletions bikeshed/markdown/markdown.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import re
from itertools import *

Expand Down Expand Up @@ -110,7 +111,7 @@ def inlineElementStart(line):
opaqueElements += ["pre", "xmp", "script", "style"]
rawElements = "|".join(re.escape(x) for x in opaqueElements)

for line in lines:
for i, line in enumerate(lines):

# Three kinds of "raw" elements, which prevent markdown processing inside of them.
# 1. <pre> and manual opaque elements, which can contain markup and so can nest.
Expand All @@ -120,19 +121,16 @@ def inlineElementStart(line):
# The rawStack holds tokens like
# {"type":"fenced", "tag":"````", "nest":False}

# TODO: when i pop the last rawstack, collect all the raw tokens in sequence and remove their indentation. gonna need to track the index explicitly, since a raw might end on one line and start on the next again, so i can't just walk backwards.
if rawStack:
# Inside at least one raw element that turns off markdown.
# First see if this line will *end* the raw element.
endTag = rawStack[-1]
if endTag["type"] == "element" and re.search(endTag["tag"], line.text):
rawStack.pop()
tokens.append({"type": "raw", "prefixlen": float("inf"), "line": line})
continue
elif endTag["type"] == "fenced" and re.match(
r"\s*{}{}*\s*$".format(endTag["tag"], endTag["tag"][0]), line.text
):
if lineEndsRawBlock(line, endTag):
rawStack.pop()
line.text = "</xmp>"
if endTag["type"] == "fenced":
stripCommonWsPrefix(tokens[endTag["start"]+1:])
line.text = "</xmp>"
tokens.append({"type": "raw", "prefixlen": float("inf"), "line": line})
continue
elif not endTag["nest"]:
Expand All @@ -146,7 +144,7 @@ def inlineElementStart(line):
match = re.match(r"(\s*)(`{3,}|~{3,})([^`]*)$", line.text)
if match:
ws, tag, infoString = match.groups()
rawStack.append({"type": "fenced", "tag": tag, "nest": False})
rawStack.append({"type": "fenced", "tag": tag, "nest": False, "start":i})
infoString = infoString.strip()
if infoString:
# For now, I only care about lang
Expand Down Expand Up @@ -351,6 +349,40 @@ def stripPrefix(token, numSpacesForIndentation, len):
return text[offset:]


def lineEndsRawBlock(line, rawToken):
return (rawToken["type"] == "element" and re.search(rawToken["tag"], line.text)) or (rawToken["type"] == "fenced" and re.match(
r"\s*{}{}*\s*$".format(rawToken["tag"], rawToken["tag"][0]), line.text
))


def stripCommonWsPrefix(tokens):
# Remove the longest common whitespace prefix from the lines.
if not tokens:
return tokens
ws = [getWsPrefix(t['line'].text) for t in tokens]
prefix = functools.reduce(commonPrefix, ws)
prefixLen = len(prefix)
for token in tokens:
token['line'].text = token['line'].text[prefixLen:]
return tokens


def commonPrefix(line1, line2):
prefixSoFar = ""
for i, char in enumerate(line1):
if i == len(line2):
break
if char == line2[i]:
prefixSoFar += char
else:
break
return prefixSoFar


def getWsPrefix(line):
return re.match(r"(\s*)", line).group(1)


def parseTokens(tokens, numSpacesForIndentation):
"""
Token types:
Expand Down
6 changes: 3 additions & 3 deletions tests/github/WICG/container-queries/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -606,9 +606,9 @@ <h3 class="heading settled" data-level="1.3" id="usage"><span class="secno">1.3.
<p>[[Inside a CSS stylesheet, one can declare that sections apply to certain media types:]]</p>
<div class="example" id="example-1">
<a class="self-link" href="#example-1"></a>
<pre class="language-css highlight"> .element<c- nf>:media</c-><c- p>(</c-> min-width: <c- m>30</c-><c- k>em</c-> <c- p>)</c-> screen <c- p>{</c->
<c- p>}</c->
<pre class="language-css highlight">.element<c- nf>:media</c-><c- p>(</c-> min-width: <c- m>30</c-><c- k>em</c-> <c- p>)</c-> screen <c- p>{</c->

<c- p>}</c->
</pre>
</div>
<h2 class="heading settled" data-level="2" id="container-queries"><span class="secno">2. </span><span class="content">Container Queries</span><a class="self-link" href="#container-queries"></a></h2>
Expand Down
8 changes: 4 additions & 4 deletions tests/github/WICG/cookie-store/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -997,10 +997,10 @@ <h3 class="heading settled" data-level="1.5" id="intro-monitor"><span class="sec
the subscriptions that have been made.</p>
<div class="example" id="example-5e735d9a">
<a class="self-link" href="#example-5e735d9a"></a> Checking change subscriptions:
<pre class="language-js highlight"> <c- a>const</c-> subscriptions <c- o>=</c-> <c- k>await</c-> self<c- p>.</c->registration<c- p>.</c->cookies<c- p>.</c->getSubscriptions<c- p>();</c->
<c- k>for</c-> <c- p>(</c-><c- a>const</c-> sub <c- k>of</c-> subscriptions<c- p>)</c-> <c- p>{</c->
console<c- p>.</c->log<c- p>(</c->sub<c- p>.</c->name<c- p>,</c-> sub<c- p>.</c->url<c- p>);</c->
<c- p>}</c->
<pre class="language-js highlight"><c- a>const</c-> subscriptions <c- o>=</c-> <c- k>await</c-> self<c- p>.</c->registration<c- p>.</c->cookies<c- p>.</c->getSubscriptions<c- p>();</c->
<c- k>for</c-> <c- p>(</c-><c- a>const</c-> sub <c- k>of</c-> subscriptions<c- p>)</c-> <c- p>{</c->
console<c- p>.</c->log<c- p>(</c->sub<c- p>.</c->name<c- p>,</c-> sub<c- p>.</c->url<c- p>);</c->
<c- p>}</c->
</pre>
</div>
<h2 class="heading settled" data-level="2" id="concepts"><span class="secno">2. </span><span class="content">Concepts</span><a class="self-link" href="#concepts"></a></h2>
Expand Down
14 changes: 7 additions & 7 deletions tests/github/WICG/document-policy/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ <h4 class="heading settled" data-level="6.1.1" id="report-to-parameter"><span cl
reporting <a data-link-type="dfn" href="https://w3c.github.io/reporting/#endpoint" id="ref-for-endpoint">endpoint</a> to which violation reports for that feature will
be sent.</p>
<p>Example:</p>
<pre> Document-Policy: something=1.0;report-to=endpoint1, something-else=?0;report-to=endpoint2
<pre>Document-Policy: something=1.0;report-to=endpoint1, something-else=?0;report-to=endpoint2
</pre>
<h4 class="heading settled" data-level="6.1.2" id="reporting-default"><span class="secno">6.1.2. </span><span class="content">Setting the default reporting endpoint</span><a class="self-link" href="#reporting-default"></a></h4>
<p>If violations for many or all features should be sent to the same endpoint,
Expand All @@ -765,7 +765,7 @@ <h4 class="heading settled" data-level="6.1.2" id="reporting-default"><span clas
own endpoint, with the <code>report-to</code> parameter, then reports will go to that
endpoint instead. (Reports are not sent to both endpoints.)</p>
<p>Example:</p>
<pre> Document-Policy: something=1.0, something-else=?0, *;report-to=endpoint
<pre>Document-Policy: something=1.0, something-else=?0, *;report-to=endpoint
</pre>
<h4 class="heading settled" data-level="6.1.3" id="reporting-disable"><span class="secno">6.1.3. </span><span class="content">Disabling reporting for a feature</span><a class="self-link" href="#reporting-disable"></a></h4>
<p>If a default endpoint has been specified, then it may be necessary to
Expand All @@ -774,8 +774,8 @@ <h4 class="heading settled" data-level="6.1.3" id="reporting-disable"><span clas
This will override the default endpoint and disable reporting for that
feature.</p>
<p>Example:</p>
<pre> Document-Policy: something=1.0;report-to=none, something-else=?0,
*;report-to=endpoiont
<pre>Document-Policy: something=1.0;report-to=none, something-else=?0,
*;report-to=endpoiont
</pre>
</section>
<section>
Expand All @@ -789,8 +789,8 @@ <h3 class="heading settled" data-level="6.2" id="report-only"><span class="secno
<p>The <code>report-to</code> directive parameter should be used with directives in this
header, or else they will have no effect at all.</p>
<p>Example:</p>
<pre> Document-Policy-Report-Only: something=1.0;report-to=endpoint,
something-else=?0;report-to=endpoint2
<pre>Document-Policy-Report-Only: something=1.0;report-to=endpoint,
something-else=?0;report-to=endpoint2
</pre>
</section>
</section>
Expand Down Expand Up @@ -897,7 +897,7 @@ <h3 class="heading settled" data-level="8.1" id="integration-with-html"><span cl
<li data-md>
<p><code><a data-link-type="element" href="https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-iframe-element" id="ref-for-the-iframe-element①">iframe</a></code> elements should have the following IDL added:</p>
</ul>
<pre> [CEReactions] attribute DOMString policy;
<pre>[CEReactions] attribute DOMString policy;
</pre>
<ul>
<li data-md>
Expand Down
20 changes: 10 additions & 10 deletions tests/github/WICG/entries-api/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -971,12 +971,12 @@ <h2 class="heading settled" data-level="5" id="html-forms"><span class="secno">5
<aside class="example" id="example-d964aaf3">
<a class="self-link" href="#example-d964aaf3"></a> Inspecting the <code class="idl"><a data-link-type="idl" href="#dom-file-webkitrelativepath" id="ref-for-dom-file-webkitrelativepath⑥">webkitRelativePath</a></code> properties after a
directory is selected with an <code><a data-link-type="element" href="https://html.spec.whatwg.org/multipage/input.html#the-input-element" id="ref-for-the-input-element①">input</a></code> element:
<pre class="language-html highlight"> <c- p>&lt;</c-><c- f>input</c-> <c- e>id</c-><c- o>=</c-><c- s>b</c-> <c- e>type</c-><c- o>=</c-><c- s>file</c-> <c- e>webkitdirectory</c-><c- p>></c->
<pre class="language-html highlight"><c- p>&lt;</c-><c- f>input</c-> <c- e>id</c-><c- o>=</c-><c- s>b</c-> <c- e>type</c-><c- o>=</c-><c- s>file</c-> <c- e>webkitdirectory</c-><c- p>></c->
</pre>
<pre class="language-js highlight"> document<c- p>.</c->querySelector<c- p>(</c-><c- t>'#b'</c-><c- p>).</c->addEventListener<c- p>(</c-><c- t>'change'</c-><c- p>,</c-> e <c- p>=></c-> <c- p>{</c->
<c- k>for</c-> <c- p>(</c->file entry <c- k>of</c-> e<c- p>.</c->target<c- p>.</c->files<c- p>)</c->
console<c- p>.</c->log<c- p>(</c->file<c- p>.</c->name<c- p>,</c-> file<c- p>.</c->webkitRelativePath<c- p>);</c->
<c- p>});</c->
<pre class="language-js highlight">document<c- p>.</c->querySelector<c- p>(</c-><c- t>'#b'</c-><c- p>).</c->addEventListener<c- p>(</c-><c- t>'change'</c-><c- p>,</c-> e <c- p>=></c-> <c- p>{</c->
<c- k>for</c-> <c- p>(</c->file entry <c- k>of</c-> e<c- p>.</c->target<c- p>.</c->files<c- p>)</c->
console<c- p>.</c->log<c- p>(</c->file<c- p>.</c->name<c- p>,</c-> file<c- p>.</c->webkitRelativePath<c- p>);</c->
<c- p>});</c->
</pre>
</aside>
<p>The <code class="idl"><a data-link-type="idl" href="#dom-htmlinputelement-webkitentries" id="ref-for-dom-htmlinputelement-webkitentries">webkitEntries</a></code> IDL attribute allows scripts to
Expand All @@ -986,12 +986,12 @@ <h2 class="heading settled" data-level="5" id="html-forms"><span class="secno">5
it must instead return null.</p>
<aside class="example" id="example-e5cc7383">
<a class="self-link" href="#example-e5cc7383"></a> Enumerating entries using <code class="idl"><a data-link-type="idl" href="#dom-htmlinputelement-webkitentries" id="ref-for-dom-htmlinputelement-webkitentries①">webkitEntries</a></code>:
<pre class="language-html highlight"> <c- p>&lt;</c-><c- f>input</c-> <c- e>id</c-><c- o>=</c-><c- s>a</c-> <c- e>type</c-><c- o>=</c-><c- s>file</c-> <c- e>multiple</c-><c- p>></c->
<pre class="language-html highlight"><c- p>&lt;</c-><c- f>input</c-> <c- e>id</c-><c- o>=</c-><c- s>a</c-> <c- e>type</c-><c- o>=</c-><c- s>file</c-> <c- e>multiple</c-><c- p>></c->
</pre>
<pre class="language-js highlight"> document<c- p>.</c->querySelector<c- p>(</c-><c- t>'#a'</c-><c- p>).</c->addEventListener<c- p>(</c-><c- t>'change'</c-><c- p>,</c-> e <c- p>=></c-> <c- p>{</c->
<c- k>for</c-> <c- p>(</c-><c- a>const</c-> entry <c- k>of</c-> e<c- p>.</c->target<c- p>.</c->webkitEntries<c- p>)</c->
handleEntry<c- p>(</c->entry<c- p>);</c->
<c- p>});</c->
<pre class="language-js highlight">document<c- p>.</c->querySelector<c- p>(</c-><c- t>'#a'</c-><c- p>).</c->addEventListener<c- p>(</c-><c- t>'change'</c-><c- p>,</c-> e <c- p>=></c-> <c- p>{</c->
<c- k>for</c-> <c- p>(</c-><c- a>const</c-> entry <c- k>of</c-> e<c- p>.</c->target<c- p>.</c->webkitEntries<c- p>)</c->
handleEntry<c- p>(</c->entry<c- p>);</c->
<c- p>});</c->
</pre>
</aside>
<aside class="issue" id="issue-4cc238bb"><a class="self-link" href="#issue-4cc238bb"></a> INTEROP:
Expand Down
2 changes: 1 addition & 1 deletion tests/github/WICG/portals/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -1764,7 +1764,7 @@ <h3 class="heading settled" data-level="5.2" id="fetch-metadata"><span class="se
</section>
<div class="note" role="note">
The effect of this is that the request for a document in a <a data-link-type="dfn" href="#portal-browsing-context" id="ref-for-portal-browsing-context①⑦">portal browsing context</a> will contain the following HTTP header, as though it were in a <a data-link-type="dfn" href="https://html.spec.whatwg.org/multipage/browsers.html#nested-browsing-context" id="ref-for-nested-browsing-context⑥">nested browsing context</a>.
<pre> Sec-Fetch-Mode: nested-navigate
<pre>Sec-Fetch-Mode: nested-navigate
</pre>
</div>
<div class="note" role="note"> Per the existing processing model, the other fetch metadata headers will automatically have the
Expand Down
4 changes: 2 additions & 2 deletions tests/github/WICG/sanitizer-api/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -750,8 +750,8 @@ <h3 class="heading settled" data-level="2.1" id="sanitizer-api"><span class="sec
<p>The <dfn class="dfn-paneled idl-code" data-dfn-for="Sanitizer" data-dfn-type="method" data-export id="dom-sanitizer-sanitizetostring"><code>sanitizeToString(<var>input</var>)</code></dfn> method steps are to return the result of running <a data-link-type="dfn" href="#sanitizetostring" id="ref-for-sanitizetostring">sanitizeToString</a> algorithm on <var>input</var>.</p>
</ul>
<p>Example:</p>
<pre class="language-js highlight"> <c- c1>// Replace an element’s content from unsanitized input:</c->
element<c- p>.</c->replaceChildren<c- p>(</c-><c- k>new</c-> Sanitizer<c- p>().</c->sanitize<c- p>(</c->userControlledInput<c- p>));</c->
<pre class="language-js highlight"><c- c1>// Replace an element’s content from unsanitized input:</c->
element<c- p>.</c->replaceChildren<c- p>(</c-><c- k>new</c-> Sanitizer<c- p>().</c->sanitize<c- p>(</c->userControlledInput<c- p>));</c->
</pre>
<h3 class="heading settled" data-level="2.2" id="inputs"><span class="secno">2.2. </span><span class="content">Input Types</span><a class="self-link" href="#inputs"></a></h3>
<p>The sanitization methods support three input types: <code>DOMString</code>, <code>Document</code>,
Expand Down
54 changes: 27 additions & 27 deletions tests/github/WICG/scroll-to-text-fragment/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -1061,27 +1061,27 @@ <h4 class="heading settled" data-level="3.3.1" id="processing-the-fragment-direc
</div>
<div class="example" id="example-609a3312">
<a class="self-link" href="#example-609a3312"></a>
<pre> window.location = 'https://example.com#foo:~:bar';
<pre>window.location = 'https://example.com#foo:~:bar';
</pre>
<p>The page loads and when the document’s URL is set the fragment directive is
stripped out during the "create and initialize a Document object" steps.</p>
<pre> console.log(window.location.href); // 'https://example.com#foo'
console.log(window.location.hash); // '#foo'
<pre>console.log(window.location.href); // 'https://example.com#foo'
console.log(window.location.hash); // '#foo'
</pre>
<p>Since same document navigations are made by adding a new session history
entry and using the "traverse the history" steps, the the fragment directive
will be stripped here as well.</p>
<pre> window.location.hash = 'fizz:~:buzz';
console.log(window.location.href); // 'https://example.com#fizz'
console.log(window.location.hash); // '#fizz'
<pre>window.location.hash = 'fizz:~:buzz';
console.log(window.location.href); // 'https://example.com#fizz'
console.log(window.location.hash); // '#fizz'
</pre>
<p>The hashchange event is dispatched when only the fragment directive changes
because the comparison for it is done on the URLs in the session history
entries, where the fragment directive hasn’t been removed.</p>
<pre> onhashchange = () => {console.log('HASHCHANGE');};
window.location.hash = 'fizz:~:zillch'; // 'HASHCHANGE'
console.log(window.location.href); // 'https://example.com#fizz'
console.log(window.location.hash); // '#fizz'
<pre>onhashchange = () => {console.log('HASHCHANGE');};
window.location.hash = 'fizz:~:zillch'; // 'HASHCHANGE'
console.log(window.location.href); // 'https://example.com#fizz'
console.log(window.location.hash); // '#fizz'
</pre>
</div>
<div class="example" id="example-42953739">
Expand All @@ -1098,11 +1098,11 @@ <h4 class="heading settled" data-level="3.3.1" id="processing-the-fragment-direc

</pre>
<p>The <code>&lt;a></code> or <code>&lt;area></code> elements:</p>
<pre> &lt;a id='anchor' href="https://example.com#foo:~:bar">Anchor&lt;/a>
&lt;script>
console.log(anchor.href); // 'https://example.com#foo:~:bar'
console.log(anchor.hash); // '#foo:~:bar'
&lt;/script>
<pre>&lt;a id='anchor' href="https://example.com#foo:~:bar">Anchor&lt;/a>
&lt;script>
console.log(anchor.href); // 'https://example.com#foo:~:bar'
console.log(anchor.hash); // '#foo:~:bar'
&lt;/script>
</pre>
</div>
<div class="example" id="example-c1edb88d">
Expand All @@ -1111,10 +1111,10 @@ <h4 class="heading settled" data-level="3.3.1" id="processing-the-fragment-direc
cause it to set its URL on the document which will process the fragment
directive before setting it on the Document (but the fragment directive
remains on the entry).
<pre> history.pushState({}, 'title', 'index.html#foo:~:bar');
window.location = 'newpage.html';
// on newpage.html
history.back();
<pre>history.pushState({}, 'title', 'index.html#foo:~:bar');
window.location = 'newpage.html';
// on newpage.html
history.back();
</pre>
<p>Results in the current document having "bar" as the fragment directive.</p>
</div>
Expand Down Expand Up @@ -1716,15 +1716,15 @@ <h4 class="heading settled" data-level="3.5.2" id="finding-ranges-in-a-document"
<a class="self-link" href="#example-73638554"></a>
<pre>:~:text=The quick,lazy dog</pre>
will fail to match in
<pre> &lt;div>The&lt;div> &lt;/div>quick brown fox&lt;/div>
&lt;div>jumped over the lazy dog&lt;/div>
<pre>&lt;div>The&lt;div> &lt;/div>quick brown fox&lt;/div>
&lt;div>jumped over the lazy dog&lt;/div>
</pre>
<p>because the starting string "The quick" does not appear within a single,
uninterrupted block. The instance of "The quick" in the document has a
block element between "The" and "quick".</p>
<p>It does, however, match in this example:</p>
<pre> &lt;div>The quick brown fox&lt;/div>
&lt;div>jumped over the lazy dog&lt;/div>
<pre>&lt;div>The quick brown fox&lt;/div>
&lt;div>jumped over the lazy dog&lt;/div>
</pre>
</div>
</div>
Expand Down Expand Up @@ -1899,9 +1899,9 @@ <h4 class="heading settled" data-level="3.5.2" id="finding-ranges-in-a-document"
into a single string in which we can search, using the node list to
determine offsets with a node so we can return a <a data-link-type="dfn" href="https://dom.spec.whatwg.org/#concept-range" id="ref-for-concept-range②①">range</a>. </p>
<p> Collection breaks when we hit a block node, e.g. searching over this tree: </p>
<pre> &lt;div>
a&lt;em>b&lt;/em>c&lt;div>d&lt;/div>e
&lt;/div>
<pre>&lt;div>
a&lt;em>b&lt;/em>c&lt;div>d&lt;/div>e
&lt;/div>
</pre>
<p></p>
<p>Will perform a search on "abc", then on "d", then on "e".</p>
Expand Down Expand Up @@ -2247,7 +2247,7 @@ <h3 class="heading settled" data-level="3.7" id="document-policy-integration"><s
<div class="example" id="example-ae692635">
<a class="self-link" href="#example-ae692635"></a> Suppose the user navigates to <code>https://example.com#:~:text=foo</code>. The
example.com server response includes the header:
<pre> Document-Policy: force-load-at-top
<pre>Document-Policy: force-load-at-top
</pre>
<p>When the page loads, the element containing "foo" will be marked as the
indicated part of the document and set as the document’s target element.
Expand Down
Loading

0 comments on commit 53a8100

Please sign in to comment.