From 0c833af604c79a294406ecf2e6dd5ebadf82e443 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sun, 2 May 2021 00:53:22 +0300 Subject: [PATCH] highlite: fix #17890 - tokenize Nim escape seq-s --- lib/packages/docutils/highlite.nim | 52 +++++++++++++++++++----------- tests/stdlib/thighlite.nim | 13 ++++++++ 2 files changed, 46 insertions(+), 19 deletions(-) create mode 100644 tests/stdlib/thighlite.nim diff --git a/lib/packages/docutils/highlite.nim b/lib/packages/docutils/highlite.nim index c0f4c97602eb0..d6ce274dd30c6 100644 --- a/lib/packages/docutils/highlite.nim +++ b/lib/packages/docutils/highlite.nim @@ -190,31 +190,33 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) = var pos = g.pos g.start = g.pos if g.state == gtStringLit: - g.kind = gtStringLit - while true: + if g.buf[pos] == '\\': + g.kind = gtEscapeSequence + inc(pos) case g.buf[pos] - of '\\': - g.kind = gtEscapeSequence + of 'x', 'X': inc(pos) + if g.buf[pos] in hexChars: inc(pos) + if g.buf[pos] in hexChars: inc(pos) + of '0'..'9': + while g.buf[pos] in {'0'..'9'}: inc(pos) + of '\0': + g.state = gtNone + else: inc(pos) + else: + g.kind = gtStringLit + while true: case g.buf[pos] - of 'x', 'X': + of '\\': + break + of '\0', '\r', '\n': + g.state = gtNone + break + of '\"': inc(pos) - if g.buf[pos] in hexChars: inc(pos) - if g.buf[pos] in hexChars: inc(pos) - of '0'..'9': - while g.buf[pos] in {'0'..'9'}: inc(pos) - of '\0': g.state = gtNone + break else: inc(pos) - break - of '\0', '\r', '\n': - g.state = gtNone - break - of '\"': - inc(pos) - g.state = gtNone - break - else: inc(pos) else: case g.buf[pos] of ' ', '\t'..'\r': @@ -985,6 +987,18 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) = of langPython: pythonNextToken(g) of langCmd: cmdNextToken(g) +proc tokenize*(text: string, lang: SourceLanguage): seq[(string, TokenClass)] = + var g: GeneralTokenizer + initGeneralTokenizer(g, text) + var prevPos = 0 + while true: + getNextToken(g, lang) + if g.kind == gtEof: + break + var s = text[prevPos ..< g.pos] + result.add (s, g.kind) + prevPos = g.pos + when isMainModule: var keywords: seq[string] # Try to work running in both the subdir or at the root. diff --git a/tests/stdlib/thighlite.nim b/tests/stdlib/thighlite.nim new file mode 100644 index 0000000000000..18bfb41a1c798 --- /dev/null +++ b/tests/stdlib/thighlite.nim @@ -0,0 +1,13 @@ + +import unittest +import ../../lib/packages/docutils/highlite + +suite "Nim tokenizing": + test "string literals and escape seq": + check("\"ok1\\nok2\\nok3\"".tokenize(langNim) == + @[("\"ok1", gtStringLit), ("\\n", gtEscapeSequence), ("ok2", gtStringLit), + ("\\n", gtEscapeSequence), ("ok3\"", gtStringLit) + ]) + check("\"\"\"ok1\\nok2\\nok3\"\"\"".tokenize(langNim) == + @[("\"\"\"ok1\\nok2\\nok3\"\"\"", gtLongStringLit) + ])