Skip to content

Commit

Permalink
highlite: fix nim-lang#17890 - tokenize Nim escape seq-s
Browse files Browse the repository at this point in the history
  • Loading branch information
a-mr committed May 1, 2021
1 parent 13b5752 commit 0c833af
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 19 deletions.
52 changes: 33 additions & 19 deletions lib/packages/docutils/highlite.nim
Original file line number Diff line number Diff line change
Expand Up @@ -190,31 +190,33 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
var pos = g.pos
g.start = g.pos
if g.state == gtStringLit:
g.kind = gtStringLit
while true:
if g.buf[pos] == '\\':
g.kind = gtEscapeSequence
inc(pos)
case g.buf[pos]
of '\\':
g.kind = gtEscapeSequence
of 'x', 'X':
inc(pos)
if g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in hexChars: inc(pos)
of '0'..'9':
while g.buf[pos] in {'0'..'9'}: inc(pos)
of '\0':
g.state = gtNone
else: inc(pos)
else:
g.kind = gtStringLit
while true:
case g.buf[pos]
of 'x', 'X':
of '\\':
break
of '\0', '\r', '\n':
g.state = gtNone
break
of '\"':
inc(pos)
if g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in hexChars: inc(pos)
of '0'..'9':
while g.buf[pos] in {'0'..'9'}: inc(pos)
of '\0':
g.state = gtNone
break
else: inc(pos)
break
of '\0', '\r', '\n':
g.state = gtNone
break
of '\"':
inc(pos)
g.state = gtNone
break
else: inc(pos)
else:
case g.buf[pos]
of ' ', '\t'..'\r':
Expand Down Expand Up @@ -985,6 +987,18 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
of langPython: pythonNextToken(g)
of langCmd: cmdNextToken(g)

proc tokenize*(text: string, lang: SourceLanguage): seq[(string, TokenClass)] =
var g: GeneralTokenizer
initGeneralTokenizer(g, text)
var prevPos = 0
while true:
getNextToken(g, lang)
if g.kind == gtEof:
break
var s = text[prevPos ..< g.pos]
result.add (s, g.kind)
prevPos = g.pos

when isMainModule:
var keywords: seq[string]
# Try to work running in both the subdir or at the root.
Expand Down
13 changes: 13 additions & 0 deletions tests/stdlib/thighlite.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

import unittest
import ../../lib/packages/docutils/highlite

suite "Nim tokenizing":
test "string literals and escape seq":
check("\"ok1\\nok2\\nok3\"".tokenize(langNim) ==
@[("\"ok1", gtStringLit), ("\\n", gtEscapeSequence), ("ok2", gtStringLit),
("\\n", gtEscapeSequence), ("ok3\"", gtStringLit)
])
check("\"\"\"ok1\\nok2\\nok3\"\"\"".tokenize(langNim) ==
@[("\"\"\"ok1\\nok2\\nok3\"\"\"", gtLongStringLit)
])

0 comments on commit 0c833af

Please sign in to comment.