doc/rst2html: some few fixes for enumerated and bullet lists (nim-lan…

…g#16295) * fix bullet/enumarated lists with many blank lines * fix enumerated list parsing * fix parse failure when next line after list empty * implement arbitrary start of enumerator * check that enumerators are in order * remove redundant start=x if x=1 or a * add some doc on implemented features * update start in rst_examples.rst * allow upper-case letters + more docs
mildred · Jan 11, 2021 · 1a8bbb9 · 1a8bbb9
1 parent ae53cb8
commit 1a8bbb9
Show file tree

Hide file tree

Showing 6 changed files with 308 additions and 36 deletions.
diff --git a/config/nimdoc.tex.cfg b/config/nimdoc.tex.cfg
@@ -50,6 +50,7 @@ doc.file = """
 \usepackage{fancyvrb, courier}
 \usepackage{tabularx}
 \usepackage{hyperref}
+\usepackage{enumitem}
 
 \begin{document}
 \title{$title $version}

diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
@@ -11,6 +11,59 @@
 ## subset is implemented. Some features of the `markdown`:idx: wiki syntax are
 ## also supported.
 ##
+## Supported RST features:
+##
+## * body elements
+##   + sections
+##   + transitions
+##   + paragraphs
+##   + bullet lists using \+, \*, \-
+##   + enumerated lists using arabic numerals or alphabet
+##     characters:  1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ...
+##   + definition lists
+##   + field lists
+##   + option lists
+##   + indented literal blocks
+##   + simple tables
+##   + directives
+##     - image, figure
+##     - code-block
+##     - substitution definitions: replace and image
+##     - ... a few more
+##   + comments
+## * inline markup
+##   + *emphasis*, **strong emphasis**, `interpreted text`,
+##     ``inline literals``, hyperlink references, substitution references,
+##     standalone hyperlinks
+##
+## Additional features:
+##
+## * ***triple emphasis*** (bold and italic) using \*\*\*
+##
+## Optional additional features, turned on by ``options: RstParseOption`` in
+## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_:
+##
+## * emoji / smiley symbols
+## * markdown tables
+## * markdown code blocks
+## * markdown links
+## * markdown headlines
+##
+## Limitations:
+##
+## * no Unicode support in character width calculations
+## * body elements
+##   - no roman numerals in enumerated lists
+##   - no quoted literal blocks
+##   - no doctest blocks
+##   - no grid tables
+##   - directives: no support for admonitions (notes, caution)
+##   - no footnotes & citations support
+##   - no inline internal targets
+## * inline markup
+##   - no simple-inline-markup
+##   - no embedded URI and aliases
+##
 ## **Note:** Import ``packages/docutils/rst`` to use this module
 
 import
@@ -569,7 +622,9 @@ proc match(p: RstParser, start: int, expr: string): bool =
   # 'p'              tkPunct
   # 'T'              always true
   # 'E'              whitespace, indent or eof
-  # 'e'              tkWord or '#' (for enumeration lists)
+  # 'e'              any enumeration sequence or '#' (for enumeration lists)
+  # 'x'              a..z or '#' (for enumeration lists)
+  # 'n'              0..9 or '#' (for enumeration lists)
   var i = 0
   var j = start
   var last = expr.len - 1
@@ -583,12 +638,16 @@ proc match(p: RstParser, start: int, expr: string): bool =
     of 'o': result = p.tok[j].kind == tkOther
     of 'T': result = true
     of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
-    of 'e':
+    of 'e', 'x', 'n':
       result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
       if result:
         case p.tok[j].symbol[0]
-        of 'a'..'z', 'A'..'Z', '#': result = p.tok[j].symbol.len == 1
-        of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'})
+        of '#': result = true
+        of 'a'..'z', 'A'..'Z':
+          result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
+        of '0'..'9':
+          result = expr[i] in {'e', 'n'} and
+                     allCharsInSet(p.tok[j].symbol, {'0'..'9'})
         else: result = false
     else:
       var c = expr[i]
@@ -1465,33 +1524,55 @@ proc parseDefinitionList(p: var RstParser): PRstNode =
 
 proc parseEnumList(p: var RstParser): PRstNode =
   const
-    wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "]
-    wildpos: array[0..2, int] = [1, 0, 0]
-  result = nil
+    wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
+                                      "(x) ", "x) ", "x. "]
+      # enumerator patterns, where 'x' means letter and 'n' means number
+    wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3]  # number of tokens
+    wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
+      # position of enumeration sequence (number/letter) in enumerator
+  result = newRstNode(rnEnumList)
+  let col = currentTok(p).col
   var w = 0
-  while w <= 2:
+  while w < wildcards.len:
     if match(p, p.idx, wildcards[w]): break
     inc w
-  if w <= 2:
-    var col = currentTok(p).col
-    result = newRstNode(rnEnumList)
-    inc p.idx, wildpos[w] + 3
-    var j = tokenAfterNewline(p)
-    if p.tok[j].col == currentTok(p).col or match(p, j, wildcards[w]):
-      pushInd(p, currentTok(p).col)
-      while true:
-        var item = newRstNode(rnEnumItem)
-        parseSection(p, item)
-        result.add(item)
-        if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
-            match(p, p.idx + 1, wildcards[w]):
-          inc p.idx, wildpos[w] + 4
-        else:
+  assert w < wildcards.len
+  for i in 0 ..< wildToken[w]-1:  # add first enumerator with (, ), and .
+    if p.tok[p.idx + i].symbol == "#":
+      result.text.add "1"
+    else:
+      result.text.add p.tok[p.idx + i].symbol
+  var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
+  inc p.idx, wildToken[w]
+  while true:
+    var item = newRstNode(rnEnumItem)
+    pushInd(p, currentTok(p).col)
+    parseSection(p, item)
+    popInd(p)
+    result.add(item)
+    if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
+        match(p, p.idx+1, wildcards[w]):
+      let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
+      # check that it's in sequence: enumerator == next(prevEnum)
+      if "n" in wildcards[w]:  # arabic numeral
+        let prevEnumI = try: parseInt(prevEnum) except: 1
+        let curEnum =
+          if enumerator == "#": prevEnumI + 1
+          else: (try: parseInt(enumerator) except: 1)
+        if curEnum - prevEnumI != 1:
           break
-      popInd(p)
+        prevEnum = enumerator
+      else:  # a..z
+        let prevEnumI = ord(prevEnum[0])
+        let curEnum =
+          if enumerator == "#": prevEnumI + 1
+          else: ord(enumerator[0])
+        if curEnum - prevEnumI != 1:
+          break
+        prevEnum = $chr(curEnum)
+      inc p.idx, 1 + wildToken[w]
     else:
-      dec p.idx, wildpos[w] + 3
-      result = nil
+      break
 
 proc sonKind(father: PRstNode, i: int): RstNodeKind =
   result = rnLeaf
@@ -1511,6 +1592,8 @@ proc parseSection(p: var RstParser, result: PRstNode) =
         result.add(a)
         popInd(p)
       else:
+        while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
+          inc p.idx  # skip blank lines
         leave = true
         break
     if leave or currentTok(p).kind == tkEof: break

diff --git a/lib/packages/docutils/rstast.nim b/lib/packages/docutils/rstast.nim
@@ -69,7 +69,7 @@ type
   RstNode* {.acyclic, final.} = object ## an RST node's description
     kind*: RstNodeKind       ## the node's kind
     text*: string             ## valid for leafs in the AST; and the title of
-                              ## the document or the section
+                              ## the document or the section; and rnEnumList
     level*: int               ## valid for some node kinds
     sons*: RstNodeSeq        ## the node's sons
 

diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
@@ -1029,6 +1029,56 @@ proc renderField(d: PDoc, n: PRstNode, result: var string) =
   if not b:
     renderAux(d, n, "<tr>$1</tr>\n", "$1", result)
 
+proc renderEnumList(d: PDoc, n: PRstNode, result: var string) =
+  var
+    specifier = ""
+    specStart = ""
+    i1 = 0
+    pre = ""
+    i2 = n.text.len-1
+    post = ""
+  if n.text[0] == '(':
+    i1 = 1
+    pre = "("
+  if n.text[^1] == ')' or n.text[^1] == '.':
+    i2 = n.text.len-2
+    post = $n.text[^1]
+  let enumR = i1 .. i2  # enumerator range without surrounding (, ), .
+  if d.target == outLatex:
+    result.add ("\n%"&n.text&"\n")
+    # use enumerate parameters from package enumitem
+    if n.text[i1].isDigit:
+      var labelDef = ""
+      if pre != "" or post != "":
+        labelDef = "label=" & pre & "\\arabic*" & post & ","
+      if n.text[enumR] != "1":
+        specStart = "start=$1" % [n.text[enumR]]
+      if labelDef != "" or specStart != "":
+        specifier = "[$1$2]" % [labelDef, specStart]
+    else:
+      let (first, labelDef) =
+        if n.text[i1].isUpperAscii: ('A', "label=" & pre & "\\Alph*" & post)
+        else: ('a', "label=" & pre & "\\alph*" & post)
+      if n.text[i1] != first:
+        specStart = ",start=" & $(ord(n.text[i1]) - ord(first) + 1)
+      specifier = "[$1$2]" % [labelDef, specStart]
+  else:  # HTML
+    # TODO: implement enumerator formatting using pre and post ( and ) for HTML
+    if n.text[i1].isDigit:
+      if n.text[enumR] != "1":
+        specStart = " start=\"$1\"" % [n.text[enumR]]
+      specifier = "class=\"simple\"" & specStart
+    else:
+      let (first, labelDef) =
+        if n.text[i1].isUpperAscii: ('A', "class=\"upperalpha simple\"")
+        else: ('a', "class=\"loweralpha simple\"")
+      if n.text[i1] != first:
+        specStart = " start=\"$1\"" % [ $(ord(n.text[i1]) - ord(first) + 1) ]
+      specifier = labelDef & specStart
+  renderAux(d, n, "<ol " & specifier & ">$1</ol>\n",
+            "\\begin{enumerate}" & specifier & "$1\\end{enumerate}\n",
+            result)
+
 proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
   if n == nil: return
   case n.kind
@@ -1042,9 +1092,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
                     "\\begin{itemize}$1\\end{itemize}\n", result)
   of rnBulletItem, rnEnumItem:
     renderAux(d, n, "<li>$1</li>\n", "\\item $1\n", result)
-  of rnEnumList:
-    renderAux(d, n, "<ol class=\"simple\">$1</ol>\n",
-                    "\\begin{enumerate}$1\\end{enumerate}\n", result)
+  of rnEnumList: renderEnumList(d, n, result)
   of rnDefList:
     renderAux(d, n, "<dl class=\"docutils\">$1</dl>\n",
                        "\\begin{description}$1\\end{description}\n", result)

diff --git a/nimdoc/rst2html/expected/rst_examples.html b/nimdoc/rst2html/expected/rst_examples.html
@@ -274,15 +274,17 @@ <h3><a class="toc-backref" id="parameter-constraints-the-starstar-operator" href
 <li>An input parameter should not be aliased with a global or thread local variable updated by the called proc.</li>
 </ol>
 <p>One problem with rules 3 and 4 is that they affect specific global or thread local variables, but Nim's effect tracking only tracks &quot;uses no global variable&quot; via <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt>. The rules 3 and 4 can also be approximated by a different rule:</p>
-<ol class="simple"><li>A global or thread local variable (or a location derived from such a location) can only passed to a parameter of a <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt> proc.</li>
+<ol class="simple" start="5"><li>A global or thread local variable (or a location derived from such a location) can only passed to a parameter of a <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt> proc.</li>
 </ol>
 <p>These two procs are the two modus operandi of the real-time garbage collector:</p>
-<p>(1) GC_SetMaxPause Mode</p>
-<blockquote><p>You can call <tt class="docutils literal"><span class="pre">GC_SetMaxPause</span></tt> at program startup and then each triggered garbage collector run tries to not take longer than <tt class="docutils literal"><span class="pre">maxPause</span></tt> time. However, it is possible (and common) that the work is nevertheless not evenly distributed as each call to <tt class="docutils literal"><span class="pre">new</span></tt> can trigger the garbage collector and thus take  <tt class="docutils literal"><span class="pre">maxPause</span></tt> time.</p></blockquote>
-<p>(2) GC_step Mode</p>
-<blockquote><p><p>This allows the garbage collector to perform some work for up to <tt class="docutils literal"><span class="pre">us</span></tt> time. This is useful to call in the main loop to ensure the garbage collector can do its work. To bind all garbage collector activity to a <tt class="docutils literal"><span class="pre">GC_step</span></tt> call, deactivate the garbage collector with <tt class="docutils literal"><span class="pre">GC_disable</span></tt> at program startup. If <tt class="docutils literal"><span class="pre">strongAdvice</span></tt> is set to <tt class="docutils literal"><span class="pre">true</span></tt>, then the garbage collector will be forced to perform the collection cycle. Otherwise, the garbage collector may decide not to do anything, if there is not much garbage to collect. You may also specify the current stack size via <tt class="docutils literal"><span class="pre">stackSize</span></tt> parameter. It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
+<ol class="simple"><li><p>GC_SetMaxPause Mode</p>
+<p>You can call <tt class="docutils literal"><span class="pre">GC_SetMaxPause</span></tt> at program startup and then each triggered garbage collector run tries to not take longer than <tt class="docutils literal"><span class="pre">maxPause</span></tt> time. However, it is possible (and common) that the work is nevertheless not evenly distributed as each call to <tt class="docutils literal"><span class="pre">new</span></tt> can trigger the garbage collector and thus take  <tt class="docutils literal"><span class="pre">maxPause</span></tt> time.</p>
+</li>
+<li><p>GC_step Mode</p>
+<p>This allows the garbage collector to perform some work for up to <tt class="docutils literal"><span class="pre">us</span></tt> time. This is useful to call in the main loop to ensure the garbage collector can do its work. To bind all garbage collector activity to a <tt class="docutils literal"><span class="pre">GC_step</span></tt> call, deactivate the garbage collector with <tt class="docutils literal"><span class="pre">GC_disable</span></tt> at program startup. If <tt class="docutils literal"><span class="pre">strongAdvice</span></tt> is set to <tt class="docutils literal"><span class="pre">true</span></tt>, then the garbage collector will be forced to perform the collection cycle. Otherwise, the garbage collector may decide not to do anything, if there is not much garbage to collect. You may also specify the current stack size via <tt class="docutils literal"><span class="pre">stackSize</span></tt> parameter. It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
 <p>It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
-</p></blockquote>
+</li>
+</ol>
 <p>These procs provide a &quot;best effort&quot; real-time guarantee; in particular the cycle collector is not aware of deadlines. Deactivate it to get more predictable real-time behaviour. Tests show that a 1ms max pause time will be met in almost all cases on modern CPUs (with the cycle collector disabled).</p>
 
 <h2><a class="toc-backref" id="code-reordering-time-measurement-with-garbage-collectors" href="#code-reordering-time-measurement-with-garbage-collectors">Time measurement with garbage collectors</a></h2><p>The garbage collectors' way of measuring time uses (see <tt class="docutils literal"><span class="pre">lib/system/timers.nim</span></tt> for the implementation):</p>