Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docgen: sort symbols (fix #17910) #18560

Merged
merged 5 commits into from
Jul 25, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 38 additions & 14 deletions compiler/docgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# by knowing how the anchors are going to be named.

import
ast, strutils, strtabs, options, msgs, os, idents,
ast, strutils, strtabs, algorithm, sequtils, options, msgs, os, idents,
wordrecg, syntaxes, renderer, lexer,
packages/docutils/rst, packages/docutils/rstgen,
json, xmltree, trees, types,
Expand Down Expand Up @@ -43,11 +43,15 @@ type
descRst: ItemPre ## Description of the item (may contain
## runnableExamples).
substitutions: seq[string] ## Variable names in `doc.item`...
sortName: string ## The string used for sorting in output
ModSection = object ## Section like Procs, Types, etc.
secItems: seq[Item] ## Pre-processed items.
finalMarkup: string ## The items, after RST pass 2 and rendering.
ModSections = array[TSymKind, ModSection]
TSections = array[TSymKind, string]
TocItem = object ## HTML TOC item
content: string
sortName: string
TocSectionsFinal = array[TSymKind, string]
ExampleGroup = ref object
## a group of runnableExamples with same rdoccmd
rdoccmd: string ## from 1st arg in `runnableExamples(rdoccmd): body`
Expand All @@ -64,8 +68,13 @@ type
module: PSym
modDeprecationMsg: string
section: ModSections # entries of ``.nim`` file (for `proc`s, etc)
toc, toc2: TSections # toc2 - grouped TOC
tocTable: array[TSymKind, Table[string, string]]
tocSimple: array[TSymKind, seq[TocItem]]
# TOC entries for non-overloadable symbols (e.g. types, constants)...
tocTable: array[TSymKind, Table[string, seq[TocItem]]]
# ...otherwise (e.g. procs)
toc2: TocSectionsFinal # TOC `content`, which is probably wrapped
# in `doc.section.toc2`
toc: TocSectionsFinal # final TOC (wrapped in `doc.section.toc`)
indexValFilename: string
analytics: string # Google Analytics javascript, "" if doesn't exist
seenSymbols: StringTableRef # avoids duplicate symbol generation for HTML.
Expand Down Expand Up @@ -874,7 +883,10 @@ proc genItem(d: PDoc, n, nameNode: PNode, k: TSymKind, docFlags: DocFlags) =

let seeSrc = genSeeSrc(d, toFullPath(d.conf, n.info), n.info.line.int)

d.section[k].secItems.add Item(descRst: comm, substitutions: @[
d.section[k].secItems.add Item(
descRst: comm,
sortName: plainNameEsc,
substitutions: @[
"name", name, "uniqueName", uniqueName,
"header", result, "itemID", $d.id,
"header_plain", plainNameEsc, "itemSym", cleanPlainSymbol,
Expand All @@ -898,12 +910,15 @@ proc genItem(d: PDoc, n, nameNode: PNode, k: TSymKind, docFlags: DocFlags) =
setIndexTerm(d[], external, symbolOrId, plain, nameNode.sym.name.s & '.' & plain,
xmltree.escape(getPlainDocstring(e).docstringSummary))

d.toc[k].add(getConfigVar(d.conf, "doc.item.toc") % [
d.tocSimple[k].add TocItem(
sortName: plainNameEsc,
content: getConfigVar(d.conf, "doc.item.toc") % [
"name", name, "header_plain", plainNameEsc,
"itemSymOrIDEnc", symbolOrIdEnc])

d.tocTable[k].mgetOrPut(cleanPlainSymbol, "").add(
getConfigVar(d.conf, "doc.item.tocTable") % [
d.tocTable[k].mgetOrPut(cleanPlainSymbol, newSeq[TocItem]()).add TocItem(
sortName: plainNameEsc,
content: getConfigVar(d.conf, "doc.item.tocTable") % [
"name", name, "header_plain", plainNameEsc,
"itemSymOrID", symbolOrId.replace(",", ",<wbr>"),
"itemSymOrIDEnc", symbolOrIdEnc])
Expand Down Expand Up @@ -1143,8 +1158,9 @@ proc finishGenerateDoc*(d: var PDoc) =
var resolved = resolveSubs(d.sharedState, f.rst)
renderRstToOut(d[], resolved, result)
of false: result &= f.str
proc cmp(x, y: Item): int = cmpIgnoreCase(x.sortName, y.sortName)
for k in TSymKind:
for item in d.section[k].secItems:
for item in d.section[k].secItems.sorted(cmp):
var itemDesc: string
renderItemPre(d, item.descRst, itemDesc)
d.section[k].finalMarkup.add(
Expand Down Expand Up @@ -1262,18 +1278,26 @@ proc genSection(d: PDoc, kind: TSymKind, groupedToc = false) =
"sectionid", $ord(kind), "sectionTitle", title,
"sectionTitleID", $(ord(kind) + 50), "content", d.section[kind].finalMarkup]

var tocSource = d.toc
proc cmp(x, y: TocItem): int = cmpIgnoreCase(x.sortName, y.sortName)
if groupedToc:
for p in d.tocTable[kind].keys:
let overloadableNames = toSeq(keys(d.tocTable[kind]))
for plainName in overloadableNames.sorted():
var overloadChoices = d.tocTable[kind][plainName]
overloadChoices.sort(cmp)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks quite slow, can we do something about it?

Copy link
Member

@timotheecour timotheecour Jul 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no it does not look quite slow, calling N sorts where N = nb overloaded symbols will be dwarfed by other costs (parsing, semcheck, etc); although this could be optimized, there is no point in premature optimization at expense of simplicity.

and indeed:
nim doc --project --doccmd:skip lib/pure/times.nim takes 1.5 seconds (excluding runnableExamples)
the cost for this block of the whole block in if groupedToc: is 0.0035 (lumping other code too, this is an upperbound), ie 1/445 of total cost for a total of 1936 overloads. There are many optimizations worth considering before considering this one.

(0.0035069999999999824, 0.0000020000000000575113, 1936, 1, "D20210723T093627.total")
diff --git a/compiler/docgen.nim b/compiler/docgen.nim
index 842f95ce7..dde9ba0bb 100644
--- a/compiler/docgen.nim
+++ b/compiler/docgen.nim
@@ -1266,7 +1266,7 @@ proc generateTags*(d: PDoc, n: PNode, r: var string) =
     if not checkForFalse(n[0][0]):
       generateTags(d, lastSon(n[0]), r)
   else: discard
-
+import std/times except getDateStr, getClockStr
 proc genSection(d: PDoc, kind: TSymKind, groupedToc = false) =
   const sectionNames: array[skModule..skField, string] = [
     "Imports", "Types", "Vars", "Lets", "Consts", "Vars", "Procs", "Funcs",
@@ -1280,10 +1280,16 @@ proc genSection(d: PDoc, kind: TSymKind, groupedToc = false) =

   proc cmp(x, y: TocItem): int = cmpIgnoreCase(x.sortName, y.sortName)
   if groupedToc:
+    var dt0 {.global.}: typeof(cpuTime() - cpuTime())
+    var tot0 {.global.}: int
+    let t = cpuTime()
+    var tot = 0
     let overloadableNames = toSeq(keys(d.tocTable[kind]))
     for plainName in overloadableNames.sorted():
       var overloadChoices = d.tocTable[kind][plainName]
+      # echo "D20210723T093748: ", $overloadChoices.len, " " , plainName
       overloadChoices.sort(cmp)
+      tot += overloadChoices.len
       var content: string
       for item in overloadChoices:
         content.add item.content
@@ -1291,6 +1297,11 @@ proc genSection(d: PDoc, kind: TSymKind, groupedToc = false) =
           "sectionid", $ord(kind), "sectionTitle", title,
           "sectionTitleID", $(ord(kind) + 50),
           "content", content, "plainName", plainName]
+    let t2 = cpuTime()
+    let dt = t2 - t
+    dt0 += dt
+    tot0 += tot
+    echo (dt0, dt, tot0, tot, "D20210723T093627.total")
   else:
     for item in d.tocSimple[kind].sorted(cmp):
       d.toc2[kind].add item.content

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are many optimizations worth considering before considering this one.

I hoped somebody would be prove me wrong by the numbers, however:

  1. Code is easier to optimize when it's still fresh in the developer's head.
  2. Things add up. Today's compiler is significantly slower than a couple of years ago. And the things that made it slower are hard to optimize. At the same time, I still get no help whatsoever in the one architectural change (= IC) that would imply we can stop caring about performance in most of the compiler's code.

Copy link
Member

@timotheecour timotheecour Jul 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with 1., but I'm skeptical of 2.; big-O notation and Amdahl's law is what matters in the end and if the compiler is slower today than years ago, it's more likely to be caused by a small number of bottlenecks (that should be identified and fixed) than to a multitude of small things adding up (eg better error messages, static analysis etc).

On the same vein, optimizing PNode (eg removing comment node) will have a bigger impact on memory/performance than optimizing PSym since much more PNode's are allocated.

As for IC, I'm still convinced this is the wrong approach, and cling shows that IC can be be done with a compiler as a service (CAS) approach even for a language like C++. Even if nim's current IC approach would end up being faster than current nim full re-compilation (which isn't the case today), the overheads involved would still make CAS much faster.

Not only that, but you'd also get a REPL for free in the process, unlike with IC.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"compiler as a service (CAS) approach" is inherently non-scalable wrt memory consumption. It's also completely not required, C# and Delphi (Delphi in 1990 btw) have proven beyond doubt that "save to disk" is fast enough for everything, REPL included. Plus IC forces us to eliminate phase-ordering bugs which plague the Nim compiler.

var content: string
for item in overloadChoices:
content.add item.content
d.toc2[kind].add getConfigVar(d.conf, "doc.section.toc2") % [
"sectionid", $ord(kind), "sectionTitle", title,
"sectionTitleID", $(ord(kind) + 50),
"content", d.tocTable[kind][p], "plainName", p]
tocSource = d.toc2
"content", content, "plainName", plainName]
else:
for item in d.tocSimple[kind].sorted(cmp):
d.toc2[kind].add item.content

d.toc[kind] = getConfigVar(d.conf, "doc.section.toc") % [
"sectionid", $ord(kind), "sectionTitle", title,
"sectionTitleID", $(ord(kind) + 50), "content", tocSource[kind]]
"sectionTitleID", $(ord(kind) + 50), "content", d.toc2[kind]]

proc relLink(outDir: AbsoluteDir, destFile: AbsoluteFile, linkto: RelativeFile): string =
$relativeTo(outDir / linkto, destFile.splitFile().dir, '/')
Expand Down
10 changes: 5 additions & 5 deletions nimdoc/testproject/expected/subdir/subdir_b/utils.html
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,16 @@ <h1 class="title">subdir/subdir_b/utils</h1>
<li><a class="reference" href="#aEnum.t"
title="aEnum(): untyped">aEnum(): untyped</a></li>

</ul>
<ul class="simple nested-toc-section">fromUtilsGen
<li><a class="reference" href="#fromUtilsGen.t"
title="fromUtilsGen(): untyped">fromUtilsGen(): untyped</a></li>

</ul>
<ul class="simple nested-toc-section">bEnum
<li><a class="reference" href="#bEnum.t"
title="bEnum(): untyped">bEnum(): untyped</a></li>

</ul>
<ul class="simple nested-toc-section">fromUtilsGen
<li><a class="reference" href="#fromUtilsGen.t"
title="fromUtilsGen(): untyped">fromUtilsGen(): untyped</a></li>

</ul>

</ul>
Expand Down
Loading