From da8faf4cd1bfafd2ce987ef3126ff1680b368d12 Mon Sep 17 00:00:00 2001 From: Joe Chen Date: Thu, 9 Feb 2023 17:32:18 +0800 Subject: [PATCH] refactor: migrate to goldmark (#89) --- .gitignore | 1 + check.go | 40 ++- funcs.go | 6 +- go.mod | 21 +- go.sum | 54 ++-- handler.go | 2 +- internal/search/excerpt.go | 10 +- internal/search/excerpt_test.go | 4 +- internal/search/index/index.go | 2 +- internal/search/index/search.go | 3 +- internal/search/query/query.go | 8 +- internal/search/search.go | 7 +- internal/search/sections.go | 50 ++-- internal/search/sections_test.go | 12 +- markdown/code_style_renderer.go | 28 -- markdown/extender.go | 354 +++++++++++++++++++++++++ markdown/headings.go | 70 +++-- markdown/markdown.go | 435 ++++++------------------------- markdown/markdown_test.go | 198 +++++++------- markdown/sgquery_lexer.go | 104 -------- markdown/tree.go | 60 +++-- markdown/tree_test.go | 17 +- search.go | 32 ++- site.go | 9 +- template.go | 3 +- 25 files changed, 752 insertions(+), 778 deletions(-) delete mode 100644 markdown/code_style_renderer.go create mode 100644 markdown/extender.go delete mode 100644 markdown/sgquery_lexer.go diff --git a/.gitignore b/.gitignore index 849ddff..4fc275f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ dist/ +.idea diff --git a/check.go b/check.go index 4948aac..4a68d7b 100644 --- a/check.go +++ b/check.go @@ -11,10 +11,12 @@ import ( "strings" "sync" - "github.com/russross/blackfriday/v2" - "github.com/sourcegraph/docsite/markdown" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" "golang.org/x/net/html" "golang.org/x/net/html/atom" + + "github.com/sourcegraph/docsite/markdown" ) // Check checks the site content for common problems (such as broken links). @@ -84,13 +86,23 @@ type contentPageCheckData struct { func (s *Site) checkContentPage(page *contentPageCheckData) (problems []string) { // Find invalid links. - ast := markdown.NewParser(markdown.NewBfRenderer()).Parse(page.Data) - ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - if entering && (node.Type == blackfriday.Link || node.Type == blackfriday.Image) { - u, err := url.Parse(string(node.LinkData.Destination)) + doc := markdown.New(markdown.Options{}).Parser().Parse(text.NewReader(page.Data)) + err := ast.Walk(doc, func(node ast.Node, entering bool) (ast.WalkStatus, error) { + if entering && (node.Kind() == ast.KindLink || node.Kind() == ast.KindImage) { + var dest string + switch n := node.(type) { + case *ast.Link: + dest = string(n.Destination) + case *ast.Image: + dest = string(n.Destination) + default: + panic("unreachable") + } + + u, err := url.Parse(dest) if err != nil { - problems = append(problems, fmt.Sprintf("invalid URL %q", node.LinkData.Destination)) - return blackfriday.GoToNext + problems = append(problems, fmt.Sprintf("invalid URL %q", dest)) + return ast.WalkContinue, nil } isPathOnly := u.Scheme == "" && u.Host == "" @@ -98,10 +110,10 @@ func (s *Site) checkContentPage(page *contentPageCheckData) (problems []string) // Reject absolute paths because they will break when browsing the docs on // GitHub/Sourcegraph in the repository, or if the root path ever changes. if isPathOnly && strings.HasPrefix(u.Path, "/") { - problems = append(problems, fmt.Sprintf("must use relative, not absolute, link to %s", node.LinkData.Destination)) + problems = append(problems, fmt.Sprintf("must use relative, not absolute, link to %s", dest)) } - if node.Type == blackfriday.Link { + if node.Kind() == ast.KindLink { // Require that relative paths link to the actual .md file, i.e not the "foo" folder in the case of // of "foo/index.md", so that browsing docs on the file system works. if isPathOnly && u.Path != "" && filepath.Ext(u.Path) == "" { @@ -109,9 +121,11 @@ func (s *Site) checkContentPage(page *contentPageCheckData) (problems []string) } } } - - return blackfriday.GoToNext + return ast.WalkContinue, nil }) + if err != nil { + problems = append(problems, fmt.Sprintf("find invalid links: %v", err)) + } // Find broken links. handler := s.Handler() @@ -132,7 +146,7 @@ func (s *Site) checkContentPage(page *contentPageCheckData) (problems []string) return } handler.ServeHTTP(rr, req) - if rr.Code != http.StatusOK { + if rr.Code != http.StatusOK && rr.Code != http.StatusMovedPermanently { problems = append(problems, fmt.Sprintf("broken link to %s", urlStr)) } }, diff --git a/funcs.go b/funcs.go index d234f17..3565846 100644 --- a/funcs.go +++ b/funcs.go @@ -11,9 +11,10 @@ import ( "github.com/mozillazg/go-slugify" "github.com/pkg/errors" - "github.com/sourcegraph/docsite/markdown" "github.com/sourcegraph/go-jsonschema/jsonschema" "github.com/sourcegraph/jsonschemadoc" + + "github.com/sourcegraph/docsite/markdown" ) // createMarkdownFuncs creates the standard set of Markdown functions expected by documentation @@ -81,6 +82,7 @@ func createMarkdownFuncs(site *Site) markdown.FuncMap { {{.Title}}
+ {{.Schema}}
` @@ -99,7 +101,7 @@ func createMarkdownFuncs(site *Site) markdown.FuncMap { return "", err } - doc, err := markdown.Run(ctx, []byte(output.String()), markdown.Options{}) + doc, err := markdown.Run(output.Bytes(), markdown.Options{}) if err != nil { return "", err } diff --git a/go.mod b/go.mod index 0cebd4a..dc9dab7 100644 --- a/go.mod +++ b/go.mod @@ -1,24 +1,25 @@ module github.com/sourcegraph/docsite require ( - github.com/Depado/bfchroma v1.2.0 - github.com/alecthomas/chroma v0.8.1 - github.com/alecthomas/colour v0.1.0 // indirect - github.com/dlclark/regexp2 v1.2.1 // indirect + github.com/alecthomas/chroma v0.10.0 github.com/mozillazg/go-slugify v0.2.0 - github.com/mozillazg/go-unidecode v0.1.1 // indirect github.com/pkg/errors v0.9.1 github.com/russross/blackfriday/v2 v2.0.1 github.com/shurcooL/sanitized_anchor_name v1.0.0 github.com/sourcegraph/go-jsonschema v0.0.0-20191016093751-6a4f2b621f5d github.com/sourcegraph/jsonschemadoc v0.0.0-20190214000648-1850b818f08c - github.com/stretchr/testify v1.4.0 // indirect + github.com/yuin/goldmark v1.5.4 + github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594 golang.org/x/net v0.0.0-20191119073136-fc4aabc6c914 golang.org/x/tools v0.0.0-20191122071640-df8e87c2cec0 - gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect - gopkg.in/yaml.v2 v2.2.7 + gopkg.in/yaml.v2 v2.3.0 ) -require github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect +require ( + github.com/dlclark/regexp2 v1.4.0 // indirect + github.com/mozillazg/go-unidecode v0.1.1 // indirect + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) -go 1.17 +go 1.18 diff --git a/go.sum b/go.sum index 34bbe2c..0dc581a 100644 --- a/go.sum +++ b/go.sum @@ -1,35 +1,15 @@ -github.com/Depado/bfchroma v1.2.0 h1:NyYPFVhWvq8S2ts6Ok4kwXVE3TEO5fof+9ZOKbBJQUo= -github.com/Depado/bfchroma v1.2.0/go.mod h1:U3RJUYwWVJrZRaJQyfS+wuxBApSTR/BC37PhAI+Ydps= -github.com/alecthomas/assert v0.0.0-20170929043011-405dbfeb8e38 h1:smF2tmSOzy2Mm+0dGI2AIUHY+w0BUc+4tn40djz7+6U= -github.com/alecthomas/assert v0.0.0-20170929043011-405dbfeb8e38/go.mod h1:r7bzyVFMNntcxPZXK3/+KdruV1H5KSlyVY0gc+NgInI= -github.com/alecthomas/chroma v0.6.0/go.mod h1:MmozekIi2rfQSzDcdEZ2BoJ9Pxs/7uc2Y4Boh+hIeZo= -github.com/alecthomas/chroma v0.8.1 h1:ym20sbvyC6RXz45u4qDglcgr8E313oPROshcuCHqiEE= -github.com/alecthomas/chroma v0.8.1/go.mod h1:sko8vR34/90zvl5QdcUdvzL3J8NKjAUx9va9jPuFNoM= -github.com/alecthomas/colour v0.0.0-20160524082231-60882d9e2721/go.mod h1:QO9JBoKquHd+jz9nshCh40fOfO+JzsoXy8qTHF68zU0= -github.com/alecthomas/colour v0.1.0 h1:nOE9rJm6dsZ66RGWYSFrXw461ZIt9A6+nHgL7FRrDUk= -github.com/alecthomas/colour v0.1.0/go.mod h1:QO9JBoKquHd+jz9nshCh40fOfO+JzsoXy8qTHF68zU0= -github.com/alecthomas/kong v0.2.4/go.mod h1:kQOmtJgV+Lb4aj+I2LEn40cbtawdWJ9Y8QLq+lElKxE= -github.com/alecthomas/repr v0.0.0-20180818092828-117648cd9897/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= -github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1 h1:GDQdwm/gAcJcLAKQQZGOJ4knlw+7rfEQQcmwTbt4p5E= -github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= -github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 h1:y5HC9v93H5EPKqaS1UYVg1uYah5Xf51mBfIoWehClUQ= -github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9hchkHSWYkEqJwUGisez3G1QY8Ryz0sdWrLPMGjLk= +github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= +github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dlclark/regexp2 v1.1.6/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= -github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= -github.com/dlclark/regexp2 v1.2.1 h1:Ff/S0snjr1oZHUNOkvA/gP6KUaMg5vDDl3Qnhjnwgm8= -github.com/dlclark/regexp2 v1.2.1/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= +github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E= +github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= -github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mozillazg/go-slugify v0.2.0 h1:SIhqDlnJWZH8OdiTmQgeXR28AOnypmAXPeOTcG7b9lk= github.com/mozillazg/go-slugify v0.2.0/go.mod h1:z7dPH74PZf2ZPFkyxx+zjPD8CNzRJNa1CGacv0gg8Ns= github.com/mozillazg/go-unidecode v0.1.1 h1:uiRy1s4TUqLbcROUrnCN/V85Jlli2AmDF6EeAXOeMHE= @@ -41,8 +21,6 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= -github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sourcegraph/go-jsonschema v0.0.0-20190205151546-7939fa138765/go.mod h1:6DfNy4BLIggAeittTJ8o9z/6d1ly+YujBTSnv03i7Bk= @@ -51,21 +29,19 @@ github.com/sourcegraph/go-jsonschema v0.0.0-20191016093751-6a4f2b621f5d/go.mod h github.com/sourcegraph/jsonschemadoc v0.0.0-20190214000648-1850b818f08c h1:MXlcJZ1VL5nNGkCj6ZTT71P4pImPkeG2lvzcJYzGvU4= github.com/sourcegraph/jsonschemadoc v0.0.0-20190214000648-1850b818f08c/go.mod h1:ovHiFoMDwf4nf7ynAc7lIhD4w0nc/6tO27DtVzqYrTQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/yuin/goldmark v1.4.5/go.mod h1:rmuwmfZ0+bvzB24eSC//bk1R1Zp3hM0OXYv/G2LIilg= +github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU= +github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594 h1:yHfZyN55+5dp1wG7wDKv8HQ044moxkyGq12KFFMFDxg= +github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594/go.mod h1:U9ihbh+1ZN7fR5Se3daSPoz1CGF9IYtSvWwVQtnzGHU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191119073136-fc4aabc6c914 h1:MlY3mEfbnWGmUi4rtHOtNnnnN4UJRGSyLPx+DXA5Sq4= golang.org/x/net v0.0.0-20191119073136-fc4aabc6c914/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 h1:opSr2sbRXk5X5/givKrrKj9HXxFpW2sdCiP8MJSKLQY= -golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20191122071640-df8e87c2cec0 h1:CWlTyMUD9qhx663mgsnpfHQPG6sI9uwY4aWgJvojriU= golang.org/x/tools v0.0.0-20191122071640-df8e87c2cec0/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -73,6 +49,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo= -gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/handler.go b/handler.go index b120324..61165d4 100644 --- a/handler.go +++ b/handler.go @@ -155,7 +155,7 @@ func (s *Site) Handler() http.Handler { } // Content page found. - data.Content, err = s.newContentPage(r.Context(), filePath, fileData, contentVersion) + data.Content, err = s.newContentPage(filePath, fileData, contentVersion) } if err != nil { // Content page not found. diff --git a/internal/search/excerpt.go b/internal/search/excerpt.go index 749f807..218d957 100644 --- a/internal/search/excerpt.go +++ b/internal/search/excerpt.go @@ -1,10 +1,10 @@ package search import ( - "strings" + "bytes" ) -func excerpt(text string, start, end, maxChars int) string { +func excerpt(text []byte, start, end, maxChars int) []byte { origStart := start origEnd := end @@ -20,7 +20,7 @@ func excerpt(text string, start, end, maxChars int) string { const breakChars = ".\n" - if index := strings.IndexAny(text[start:origStart], breakChars); index != -1 { + if index := bytes.IndexAny(text[start:origStart], breakChars); index != -1 { start += index + 1 end += index if end > len(text) { @@ -28,12 +28,12 @@ func excerpt(text string, start, end, maxChars int) string { } } - if index := strings.LastIndexAny(text[origEnd:end], breakChars); index != -1 { + if index := bytes.LastIndexAny(text[origEnd:end], breakChars); index != -1 { end = origEnd + index + 1 if end > len(text) { end = len(text) } } - return strings.TrimSpace(text[start:end]) + return bytes.TrimSpace(text[start:end]) } diff --git a/internal/search/excerpt_test.go b/internal/search/excerpt_test.go index 11c1780..2006b0e 100644 --- a/internal/search/excerpt_test.go +++ b/internal/search/excerpt_test.go @@ -30,8 +30,8 @@ func TestExcerpt(t *testing.T) { } for name, test := range tests { t.Run(name, func(t *testing.T) { - got := excerpt(test.text, test.start, test.end, test.maxChars) - if got != test.want { + got := excerpt([]byte(test.text), test.start, test.end, test.maxChars) + if string(got) != test.want { t.Errorf("got %q, want %q", got, test.want) } }) diff --git a/internal/search/index/index.go b/internal/search/index/index.go index ab19125..6557014 100644 --- a/internal/search/index/index.go +++ b/internal/search/index/index.go @@ -12,7 +12,7 @@ type Document struct { ID DocID // the document ID Title string // the document title URL string // the document URL - Data string // the text content + Data []byte // the text content } // Index is a search index. diff --git a/internal/search/index/search.go b/internal/search/index/search.go index 3ffd3d2..77c8f95 100644 --- a/internal/search/index/search.go +++ b/internal/search/index/search.go @@ -1,7 +1,6 @@ package index import ( - "context" "sort" "github.com/sourcegraph/docsite/internal/search/query" @@ -20,7 +19,7 @@ type DocumentResult struct { } // Search performs a search against the index. -func (i *Index) Search(ctx context.Context, query query.Query) (*Result, error) { +func (i *Index) Search(query query.Query) (*Result, error) { var documentResults []DocumentResult for _, doc := range i.index { if query.Match(doc.URL, doc.Data) { diff --git a/internal/search/query/query.go b/internal/search/query/query.go index d86c8c1..6c3c24d 100644 --- a/internal/search/query/query.go +++ b/internal/search/query/query.go @@ -34,14 +34,14 @@ func Parse(queryStr string) Query { } // Match reports whether the path or text contains at least 1 match of the query. -func (q Query) Match(pathStr, text string) bool { +func (q Query) Match(pathStr string, text []byte) bool { name := path.Base(pathStr) for _, token := range q.tokens { if token.pattern.MatchString(name) { return true } - if token.pattern.MatchString(text) { + if token.pattern.Match(text) { return true } } @@ -51,7 +51,7 @@ func (q Query) Match(pathStr, text string) bool { const maxMatchesPerDoc = 50 // Score scores the query match against the path and text. -func (q Query) Score(pathStr, text string) float64 { +func (q Query) Score(pathStr string, text []byte) float64 { name := path.Base(pathStr) tokensInName := 0 @@ -61,7 +61,7 @@ func (q Query) Score(pathStr, text string) float64 { if token.pattern.MatchString(name) { tokensInName++ } - count := len(token.pattern.FindAllStringIndex(text, maxMatchesPerDoc)) + count := len(token.pattern.FindAllIndex(text, maxMatchesPerDoc)) if count > 0 { tokensMatching++ } diff --git a/internal/search/search.go b/internal/search/search.go index a8094da..037c0d9 100644 --- a/internal/search/search.go +++ b/internal/search/search.go @@ -1,9 +1,8 @@ package search import ( - "context" - "github.com/pkg/errors" + "github.com/sourcegraph/docsite/internal/search/index" "github.com/sourcegraph/docsite/internal/search/query" ) @@ -20,8 +19,8 @@ type DocumentResult struct { SectionResults []SectionResult } -func Search(ctx context.Context, query query.Query, index *index.Index) (*Result, error) { - result0, err := index.Search(ctx, query) +func Search(query query.Query, index *index.Index) (*Result, error) { + result0, err := index.Search(query) if err != nil { return nil, err } diff --git a/internal/search/sections.go b/internal/search/sections.go index 4f80306..2230f60 100644 --- a/internal/search/sections.go +++ b/internal/search/sections.go @@ -1,7 +1,11 @@ package search import ( - "github.com/russross/blackfriday/v2" + gohtml "html" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/sourcegraph/docsite/internal/search/query" "github.com/sourcegraph/docsite/markdown" ) @@ -14,7 +18,7 @@ type SectionResult struct { Excerpts []string // the match excerpt } -func documentSectionResults(data string, query query.Query) ([]SectionResult, error) { +func documentSectionResults(source []byte, query query.Query) ([]SectionResult, error) { type stackEntry struct { id string title string @@ -22,8 +26,7 @@ func documentSectionResults(data string, query query.Query) ([]SectionResult, er } stack := []stackEntry{{}} cur := func() stackEntry { return stack[len(stack)-1] } - ast := markdown.NewParser(markdown.NewBfRenderer()).Parse([]byte(data)) - markdown.SetHeadingIDs(ast) + root := markdown.New(markdown.Options{}).Parser().Parse(text.NewReader(source)) var results []SectionResult addResult := func(excerpts []string) { @@ -53,36 +56,47 @@ func documentSectionResults(data string, query query.Query) ([]SectionResult, er }) } - ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - if entering && node.Type == blackfriday.Heading { - for node.Level <= cur().level { + err := ast.Walk(root, func(node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + if node.Kind() == ast.KindHeading { + n := node.(*ast.Heading) + for n.Level <= cur().level { stack = stack[:len(stack)-1] } // For the document top title heading, use the empty ID. var id string if !markdown.IsDocumentTopTitleHeadingNode(node) { - id = node.HeadingID + id = markdown.GetAttributeID(n) } stack = append(stack, stackEntry{ id: id, - title: string(markdown.RenderText(node)), - level: node.Level, + title: string(n.Text(source)), + level: n.Level, }) } - if entering && (node.Type == blackfriday.Paragraph || node.Type == blackfriday.Item || node.Type == blackfriday.Heading || node.Type == blackfriday.BlockQuote || node.Type == blackfriday.Code) { - text := string(markdown.RenderText(node)) - if matches := query.FindAllIndex(text); len(matches) > 0 { + if entering && + (node.Kind() == ast.KindParagraph || + node.Kind() == ast.KindListItem || + node.Kind() == ast.KindHeading || + node.Kind() == ast.KindBlockquote || + node.Kind() == ast.KindCodeBlock || + node.Kind() == ast.KindFencedCodeBlock) { + text := node.Text(source) + if matches := query.FindAllIndex(string(text)); len(matches) > 0 { // Don't include excerpts for heading because all of the heading is considered the // match. var excerpts []string - if node.Type != blackfriday.Heading { + if node.Kind() != ast.KindHeading { excerpts = make([]string, len(matches)) for i, match := range matches { const excerptMaxLength = 220 - excerpts[i] = excerpt(text, match[0], match[1], excerptMaxLength) + excerpts[i] = gohtml.UnescapeString(string(excerpt(text, match[0], match[1], excerptMaxLength))) } } @@ -97,11 +111,11 @@ func documentSectionResults(data string, query query.Query) ([]SectionResult, er addResult(excerpts) - return blackfriday.SkipChildren + return ast.WalkSkipChildren, nil } } - return blackfriday.GoToNext + return ast.WalkContinue, nil }) - return results, nil + return results, err } diff --git a/internal/search/sections_test.go b/internal/search/sections_test.go index 6c18931..ae4600d 100644 --- a/internal/search/sections_test.go +++ b/internal/search/sections_test.go @@ -29,11 +29,11 @@ aa zz bb zz`, wantQueryResults: map[string][]string{ - "a": []string{"#"}, - "aa": []string{"#"}, - "b": []string{"#b"}, - "bb": []string{"#b"}, - "zz": []string{"#", "#b"}, + "a": {"#"}, + "aa": {"#"}, + "b": {"#b"}, + "bb": {"#b"}, + "zz": {"#", "#b"}, }, }, } @@ -41,7 +41,7 @@ bb zz`, t.Run(name, func(t *testing.T) { for queryStr, wantResults := range test.wantQueryResults { t.Run(queryStr, func(t *testing.T) { - results, err := documentSectionResults(test.data, query.Parse(queryStr)) + results, err := documentSectionResults([]byte(test.data), query.Parse(queryStr)) if err != nil { t.Fatal(err) } diff --git a/markdown/code_style_renderer.go b/markdown/code_style_renderer.go deleted file mode 100644 index bc2ae06..0000000 --- a/markdown/code_style_renderer.go +++ /dev/null @@ -1,28 +0,0 @@ -package markdown - -import ( - "fmt" - "io" - - "github.com/russross/blackfriday/v2" -) - -type CodeStyleRenderer struct { - blackfriday.Renderer -} - -func (c CodeStyleRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - if node.Type == blackfriday.CodeBlock { - lang := string(node.Info) - if lang == "" { - lang = "plaintext" - } - - fmt.Fprintf(w, `
`, lang)
-		walkStatus := c.Renderer.RenderNode(w, node, entering)
-		fmt.Fprint(w, "
") - return walkStatus - } - - return c.Renderer.RenderNode(w, node, entering) -} diff --git a/markdown/extender.go b/markdown/extender.go new file mode 100644 index 0000000..4a13d7c --- /dev/null +++ b/markdown/extender.go @@ -0,0 +1,354 @@ +package markdown + +import ( + "bytes" + "context" + "fmt" + "html" + "net/url" + "regexp" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/renderer" + goldmarkhtml "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" +) + +var _ goldmark.Extender = (*extender)(nil) + +type extender struct { + Options +} + +func (e *extender) Extend(m goldmark.Markdown) { + m.Renderer().AddOptions( + renderer.WithNodeRenderers( + util.Prioritized(&nodeRenderer{e.Options}, 10), + ), + ) +} + +var _ renderer.NodeRenderer = (*nodeRenderer)(nil) + +type nodeRenderer struct { + Options +} + +func (r *nodeRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindHeading, func(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.Heading) + if !entering { + _, _ = w.WriteString("\n") + return ast.WalkContinue, nil + } + + _, _ = w.WriteString("') + + // Add "#" anchor links to headers to make it easy for users to discover and copy links + // to sections of a document. + attrID := GetAttributeID(n) + + // If heading consists only of a link, do not emit an anchor link. + if hasSingleChildOfLink(n) { + _, _ = fmt.Fprintf(w, ``, attrID) + } else { + _, _ = fmt.Fprintf(w, ``, attrID) + } + return ast.WalkContinue, nil + }) + reg.Register(ast.KindHTMLBlock, func(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.HTMLBlock) + if !entering { + if n.HasClosure() { + val := n.ClosureLine.Value(source) + // For unknown reason, goldmark would write closure for HTML comment twice. + if !bytes.Contains(val, []byte("-->")) { + _, _ = w.Write(val) + } + } + return ast.WalkContinue, nil + } + + var val []byte + for i := 0; i < n.Lines().Len(); i++ { + s := n.Lines().At(i) + val = append(val, s.Value(source)...) + } + + if entering { + // Rewrite URLs correctly when they are relative to the document, regardless of whether it's + // an index.md document or not. + if r.Options.Base != nil { + if v, err := rewriteRelativeURLsInHTML(val, r.Options); err == nil { + val = v + } + } + + // Evaluate Markdown funcs (
nodes), using a heuristic to + // skip blocks that don't contain any invocations. + if v, err := EvalMarkdownFuncs(context.Background(), val, r.Options); err == nil { + val = v + } else { + return ast.WalkStop, err + } + + _, _ = w.Write(val) + } else if n.HasClosure() { + _, _ = w.Write(n.ClosureLine.Value(source)) + } + return ast.WalkContinue, nil + }) + reg.Register(ast.KindRawHTML, func(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkSkipChildren, nil + } + + n := node.(*ast.RawHTML) + + var val []byte + l := n.Segments.Len() + for i := 0; i < l; i++ { + segment := n.Segments.At(i) + val = append(val, segment.Value(source)...) + } + + // Rewrite URLs correctly when they are relative to the document, regardless of whether it's + // an index.md document or not. + if r.Options.Base != nil { + if v, err := rewriteRelativeURLsInHTML(val, r.Options); err == nil { + val = v + } + } + _, _ = w.Write(val) + return ast.WalkSkipChildren, nil + }) + reg.Register(ast.KindBlockquote, func(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*ast.Blockquote) + paragraph := n.FirstChild() + var val []byte + for i := 0; i < paragraph.Lines().Len(); i++ { + s := paragraph.Lines().At(i) + val = append(val, s.Value(source)...) + } + + parseAside := func(literal []byte) string { + switch { + case bytes.HasPrefix(literal, []byte("NOTE:")): + return "note" + case bytes.HasPrefix(literal, []byte("WARNING:")): + return "warning" + default: + return "" + } + } + aside := parseAside(val) + if aside != "" { + if entering { + _, _ = w.WriteString(fmt.Sprintf("\n") + } + } else { + if entering { + _, _ = w.WriteString("
\n") + } else { + _, _ = w.WriteString("
\n") + } + } + return ast.WalkContinue, nil + }) + + var anchorDirectivePattern = regexp.MustCompile(`\{#[\w.-]+\}`) + reg.Register(ast.KindText, func(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + n := node.(*ast.Text) + text := n.Text(source) + + // Rewrites `{#foo}` directives in text to `` anchors. + matches := anchorDirectivePattern.FindAllIndex(text, -1) + if len(matches) > 0 { + i := 0 + for _, match := range matches { + start, end := match[0], match[1] + if i != start { + _, _ = w.Write(text[i:start]) + } + + escapedID := html.EscapeString(string(text[start+2 : end-1])) + _, _ = w.WriteString(fmt.Sprintf(``, escapedID)) + i = end + } + if i != len(text) { + _, _ = w.Write(text[i:]) + } + return ast.WalkContinue, nil + } + + // Marks up strings that look like dates as `