Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug 27 of #17340 #19433

Merged
merged 1 commit into from
Feb 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 48 additions & 18 deletions lib/packages/docutils/rst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@

import
os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils,
std/private/miscdollars, tables
std/private/miscdollars, tables, strscans
from highlite import SourceLanguage, getSourceLanguage

type
Expand Down Expand Up @@ -1347,15 +1347,36 @@ proc match(p: RstParser, start: int, expr: string): bool =
inc i
result = true

proc fixupEmbeddedRef(n, a, b: PRstNode) =
proc safeProtocol*(linkStr: var string): string =
# Returns link's protocol and, if it's not safe, clears `linkStr`
result = ""
if scanf(linkStr, "$w:", result):
# if it has a protocol at all, ensure that it's not 'javascript:' or worse:
if cmpIgnoreCase(result, "http") == 0 or
cmpIgnoreCase(result, "https") == 0 or
cmpIgnoreCase(result, "ftp") == 0:
discard "it's fine"
Comment on lines +1357 to +1358
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What determines whether a protocol is "trusted"? That is, why not "tel" (commonly used in telephone numbers), "ssh", etc.?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is related to URL-parsing. If you have tel/ssh URLs, you cannot write them as [clickme](url) anymore. That's perfectly ok. Alternatively only "javascript" links are really the harmful ones but I prefer the whitelist approach here.

else:
linkStr = ""

proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool =
# Returns `true` if the link belongs to an allowed protocol
var sep = - 1
for i in countdown(n.len - 2, 0):
if n.sons[i].text == "<":
sep = i
break
var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
for i in countup(0, sep - incr): a.add(n.sons[i])
for i in countup(sep + 1, n.len - 2): b.add(n.sons[i])
var linkStr = ""
for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes)
if linkStr != "":
let protocol = safeProtocol(linkStr)
result = linkStr != ""
if not result:
rstMessage(p, mwBrokenLink, protocol,
p.tok[p.idx-3].line, p.tok[p.idx-3].col)
b.add newLeaf(linkStr)

proc whichRole(p: RstParser, sym: string): RstNodeKind =
result = whichRoleAux(sym)
Expand Down Expand Up @@ -1407,14 +1428,17 @@ proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
var a = newRstNode(rnInner)
var b = newRstNode(rnInner)
fixupEmbeddedRef(n, a, b)
if a.len == 0:
newKind = rnStandaloneHyperlink
newSons = @[b]
else:
newKind = rnHyperlink
newSons = @[a, b]
setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
if fixupEmbeddedRef(p, n, a, b):
if a.len == 0: # e.g. `<a_named_relative_link>`_
newKind = rnStandaloneHyperlink
newSons = @[b]
else: # e.g. `link title <http://site>`_
newKind = rnHyperlink
newSons = @[a, b]
setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
else: # include as plain text, not a link
newKind = rnInner
newSons = n.sons
result = newRstNode(newKind, newSons)
else: # some link that will be resolved in `resolveSubs`
newKind = rnRef
Expand Down Expand Up @@ -1623,7 +1647,6 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
result.add(lb)

proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
result = true
var desc, link = ""
var i = p.idx

Expand All @@ -1642,14 +1665,21 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =

parse("]", desc)
if p.tok[i].symbol != "(": return false
let linkIdx = i + 1
parse(")", link)
let child = newRstNode(rnHyperlink)
child.add desc
child.add link
# only commit if we detected no syntax error:
father.add child
p.idx = i
result = true
let protocol = safeProtocol(link)
if link == "":
result = false
rstMessage(p, mwBrokenLink, protocol,
p.tok[linkIdx].line, p.tok[linkIdx].col)
else:
let child = newRstNode(rnHyperlink)
child.add desc
child.add link
father.add child
p.idx = i
result = true

proc getFootnoteType(label: PRstNode): (FootnoteType, int) =
if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and
Expand Down
17 changes: 3 additions & 14 deletions lib/packages/docutils/rstgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
## can be done by simply searching for [footnoteName].

import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils,
algorithm, parseutils, std/strbasics, strscans
algorithm, parseutils, std/strbasics

import ../../std/private/since

Expand Down Expand Up @@ -826,17 +826,6 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) =
"\\rstov$4[$5]{$3}$2\n", [$n.level,
rstnodeToRefname(n).idS, tmp, $chr(n.level - 1 + ord('A')), tocName])


proc safeProtocol(linkStr: var string) =
var protocol = ""
if scanf(linkStr, "$w:", protocol):
# if it has a protocol at all, ensure that it's not 'javascript:' or worse:
if cmpIgnoreCase(protocol, "http") == 0 or cmpIgnoreCase(protocol, "https") == 0 or
cmpIgnoreCase(protocol, "ftp") == 0:
discard "it's fine"
else:
linkStr = ""

proc renderTocEntry(d: PDoc, e: TocEntry, result: var string) =
dispA(d.target, result,
"<li><a class=\"reference\" id=\"$1_toc\" href=\"#$1\">$2</a></li>\n",
Expand Down Expand Up @@ -901,7 +890,7 @@ proc renderImage(d: PDoc, n: PRstNode, result: var string) =

# support for `:target:` links for images:
var target = esc(d.target, getFieldValue(n, "target").strip(), escMode=emUrl)
safeProtocol(target)
discard safeProtocol(target)

if target.len > 0:
# `htmlOut` needs to be of the following format for link to work for images:
Expand Down Expand Up @@ -1205,7 +1194,7 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string,
d.escMode = emUrl
renderRstToOut(d, link, linkStr)
d.escMode = mode
safeProtocol(linkStr)
discard safeProtocol(linkStr)
var textStr = ""
renderRstToOut(d, text, textStr)
let nimDocStr = if nimdoc: " nimdoc" else: ""
Expand Down
27 changes: 21 additions & 6 deletions tests/stdlib/trst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -843,12 +843,7 @@ suite "Warnings":
rnInner
rnLeaf 'foo'
rnInner
rnLeaf '#'
rnLeaf 'foo'
rnLeaf ','
rnLeaf 'string'
rnLeaf ','
rnLeaf 'string'
rnLeaf '#foo,string,string'
rnParagraph anchor='foo'
rnLeaf 'Paragraph'
rnLeaf '.'
Expand Down Expand Up @@ -1256,3 +1251,23 @@ suite "RST inline markup":
rnLeaf 'my {link example'
rnLeaf 'http://example.com/bracket_(symbol_[)'
""")

test "not a Markdown link":
# bug #17340 (27) `f` will be considered as a protocol and blocked as unsafe
var warnings = new seq[string]
check("[T](f: var Foo)".toAst(warnings = warnings) ==
dedent"""
rnInner
rnLeaf '['
rnLeaf 'T'
rnLeaf ']'
rnLeaf '('
rnLeaf 'f'
rnLeaf ':'
rnLeaf ' '
rnLeaf 'var'
rnLeaf ' '
rnLeaf 'Foo'
rnLeaf ')'
""")
check(warnings[] == @["input(1, 5) Warning: broken link 'f'"])
11 changes: 9 additions & 2 deletions tests/stdlib/trstgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1593,8 +1593,15 @@ suite "invalid targets":
test "invalid links":
check("(([Nim](https://nim-lang.org/)))".toHtml ==
"""((<a class="reference external" href="https://nim-lang.org/">Nim</a>))""")
check("(([Nim](javascript://nim-lang.org/)))".toHtml ==
"""((<a class="reference external" href="">Nim</a>))""")
# unknown protocol is treated just like plain text, not a link
var warnings = new seq[string]
check("(([Nim](javascript://nim-lang.org/)))".toHtml(warnings=warnings) ==
"""(([Nim](javascript://nim-lang.org/)))""")
check(warnings[] == @["input(1, 9) Warning: broken link 'javascript'"])
warnings[].setLen 0
check("`Nim <javascript://nim-lang.org/>`_".toHtml(warnings=warnings) ==
"""Nim &lt;javascript://nim-lang.org/&gt;""")
check(warnings[] == @["input(1, 33) Warning: broken link 'javascript'"])

suite "local file inclusion":
test "cannot include files in sandboxed mode":
Expand Down