Skip to content
This repository has been archived by the owner on Jan 11, 2023. It is now read-only.

Commit

Permalink
Sanitize HTML output, remove SVG support
Browse files Browse the repository at this point in the history
Sanitizing SVG is difficult and requires extra attention. A SVG sanitizer library could be used in the future
  • Loading branch information
samuelmeuli committed Mar 21, 2020
1 parent b3e8afd commit 061c5c7
Show file tree
Hide file tree
Showing 9 changed files with 363 additions and 128 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Comes with built-in Markdown rendering and syntax highlighting
- Implemented in Go for speed and portability
- Sample styles support light and dark mode
- Output is sanitized to prevent code injection

## Example

Expand Down
106 changes: 32 additions & 74 deletions convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,89 +2,52 @@
package nbtohtml

import (
"bytes"
"fmt"
"html"
"html/template"
"io"
"io/ioutil"
"path/filepath"
"strings"

"github.com/alecthomas/chroma"
htmlFormatter "github.com/alecthomas/chroma/formatters/html"
"github.com/alecthomas/chroma/lexers"
"github.com/alecthomas/chroma/styles"
"github.com/buildkite/terminal-to-html"
"gopkg.in/russross/blackfriday.v2"
)

// 3rd party renderers

// highlightCode uses the Chroma library to convert the provided source code string to HTML. Instead
// of inline styles, HTML classes are used for syntax highlighting, which allows the users to style
// source code according to their needs.
func highlightCode(writer io.Writer, source string, lexer string) error {
l := lexers.Get(lexer)
if l == nil {
l = lexers.Analyse(source)
}
if l == nil {
l = lexers.Fallback
}
l = chroma.Coalesce(l)

// Configure Chroma to use classes instead of inline styles
formatter := htmlFormatter.New(htmlFormatter.WithClasses(true))

iterator, err := l.Tokenise(nil, source)
if err != nil {
return err
}
return formatter.Format(writer, styles.GitHub, iterator)
}

// renderMarkdown uses the Blackfriday library to convert the provided Markdown lines to HTML.
func renderMarkdown(markdownLines []string) string {
markdownString := strings.Join(markdownLines, "")
return string(blackfriday.Run([]byte(markdownString)))
}

// Output renderers

// convertDataOutput converts data output (e.g. a base64-encoded plot image) to HTML.
func convertDataOutput(output output) template.HTML {
htmlString := ""
var outputHTML template.HTML = ""

switch {
case output.Data.TextHTML != nil:
htmlString = strings.Join(output.Data.TextHTML, "")
htmlString := strings.Join(output.Data.TextHTML, "")
outputHTML = sanitizeHTML(htmlString)
case output.Data.ApplicationPDF != nil:
// TODO: Implement PDF conversion
fmt.Printf("missing conversion logic for `application/pdf` data type\n")
htmlString = "<pre>PDF output</pre>"
outputHTML = "<pre>PDF output</pre>"
case output.Data.TextLaTeX != nil:
// TODO: Implement LaTeX conversion
fmt.Printf("missing conversion logic for `text/latex` data type\n")
htmlString = "<pre>LaTeX output</pre>"
outputHTML = "<pre>LaTeX output</pre>"
case output.Data.ImageSVGXML != nil:
htmlString = strings.Join(output.Data.ImageSVGXML, "")
// TODO: Implement LaTeX conversion
fmt.Printf("missing conversion logic for `image/svg+xml` data type\n")
outputHTML = "<pre>SVG output</pre>"
case output.Data.ImagePNG != nil:
htmlString = fmt.Sprintf(`<img src="data:image/png;base64,%s">`, *output.Data.ImagePNG)
htmlString := fmt.Sprintf(`<img src="data:image/png;base64,%s">`, *output.Data.ImagePNG)
outputHTML = sanitizeHTML(htmlString)
case output.Data.ImageJPEG != nil:
htmlString = fmt.Sprintf(`<img src="data:image/jpeg;base64,%s">`, *output.Data.ImageJPEG)
htmlString := fmt.Sprintf(`<img src="data:image/jpeg;base64,%s">`, *output.Data.ImageJPEG)
outputHTML = sanitizeHTML(htmlString)
case output.Data.TextMarkdown != nil:
htmlString = renderMarkdown(output.Data.TextMarkdown)
outputHTML = renderMarkdown(output.Data.TextMarkdown)
case output.Data.TextPlain != nil:
htmlString = fmt.Sprintf(
`<pre>%s</pre>`,
html.EscapeString(strings.Join(output.Data.TextPlain, "")),
)
escapedHTML := escapeHTML(strings.Join(output.Data.TextPlain, ""))
outputHTML = "<pre>" + escapedHTML + "</pre>"
default:
fmt.Printf("missing `execute_result` data type in output of type `%s`\n", output.OutputType)
}

return template.HTML(htmlString)
return outputHTML
}

// convertErrorOutput converts error output (e.g. generated by a Python exception) to HTML.
Expand All @@ -95,13 +58,7 @@ func convertErrorOutput(output output) template.HTML {
}

// Convert ANSI colors to HTML
linesHTML := []string{}
for _, tracebackLine := range output.Traceback {
lineHTML := terminal.Render([]byte(tracebackLine))
linesHTML = append(linesHTML, string(lineHTML))
}
htmlString := fmt.Sprintf(`<pre>%s</pre>`, strings.Join(linesHTML, "\n"))
return template.HTML(htmlString)
return renderTerminalOutput(output.Traceback)
}

// convertStreamOutput converts stream output (e.g. stdout written by a Python program) to HTML.
Expand All @@ -111,36 +68,36 @@ func convertStreamOutput(output output) template.HTML {
return ""
}

htmlString := fmt.Sprintf(`<pre>%s</pre>`, strings.Join(output.Text, ""))
return template.HTML(htmlString)
escapedHTML := escapeHTML(strings.Join(output.Text, ""))
return "<pre>" + escapedHTML + "</pre>"
}

// Cell renderers

// convertMarkdownCell converts a Markdown cell to HTML.
func convertMarkdownCell(cell cell) template.HTML {
return template.HTML(renderMarkdown(cell.Source))
return renderMarkdown(cell.Source)
}

// convertCodeCell converts a code cell to HTML with classes for syntax highlighting.
func convertCodeCell(cell cell, fileExtension string) template.HTML {
codeString := strings.Join(cell.Source, "")
codeBuffer := new(bytes.Buffer)
err := highlightCode(codeBuffer, codeString, fileExtension)
sourceString := strings.Join(cell.Source, "")
cellHTML, err := renderSourceCode(sourceString, fileExtension)

// Render code without syntax highlighting if an error occurred
if err != nil {
fmt.Printf("skipping syntax highlighting: %d\n", err)
return template.HTML(fmt.Sprintf("<pre>%s</pre>", codeString))
escapedHTML := escapeHTML(sourceString)
return "<pre>" + escapedHTML + "</pre>"
}
return template.HTML(codeBuffer.String())

return cellHTML
}

// convertRawCell returns a simple HTML element for the raw notebook cell.
func convertRawCell(cell cell) template.HTML {
htmlString := fmt.Sprintf(
`<pre>%s</pre>`,
html.EscapeString(strings.Join(cell.Source, "")),
)
return template.HTML(htmlString)
escapedHTML := escapeHTML(strings.Join(cell.Source, ""))
return "<pre>" + escapedHTML + "</pre>"
}

// Input/output renderers
Expand All @@ -150,7 +107,8 @@ func convertPrompt(executionCount *int) template.HTML {
if executionCount == nil {
return ""
}
return template.HTML(fmt.Sprintf("[%d]:", *executionCount))
// Execution count is an integer, so HTML should be safe from code injection
return template.HTML(fmt.Sprintf("[%d]:", *executionCount)) // nolint:gosec
}

// convertOutput converts the provided cell input to HTML.
Expand Down
121 changes: 91 additions & 30 deletions convert_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// nolint:gosec
package nbtohtml

import (
Expand All @@ -17,74 +18,102 @@ text
assert.Equal(t, expected, actual)
}

func TestConvertStreamOutputCodeInjection(t *testing.T) {
expected := template.HTML(`<pre>multiline
stream
text
&lt;script&gt;window.alert(&#39;I&#39;m evil!&#39;);&lt;/script&gt;
</pre>`)
actual := convertStreamOutput(testStreamOutputCodeInjection)
assert.Equal(t, expected, actual)
}

func TestConvertStreamOutputMissingKey(t *testing.T) {
expected := template.HTML("")
actual := convertStreamOutput(output{OutputType: "stream"})
actual := convertStreamOutput(testStreamOutputMissingKey)
assert.Equal(t, expected, actual)
}

func TestConvertDataOutputHTML(t *testing.T) {
expected := template.HTML(`<div>
<p>Hello world</p>
</div>`)
actual := convertDataOutput(testHTMLOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataHTMLOutput(t *testing.T) {
func TestConvertDataOutputHTMLCodeInjection(t *testing.T) {
expected := template.HTML(`<div>
<p>Hello world</p>
</div>`)
actual := convertDataOutput(testDisplayDataHTMLOutput)
actual := convertDataOutput(testHTMLOutputCodeInjection)
assert.Equal(t, expected, actual)
}

func TestConvertDataPDFOutput(t *testing.T) {
func TestConvertDataOutputPDF(t *testing.T) {
expected := template.HTML("<pre>PDF output</pre>")
actual := convertDataOutput(testDisplayDataPDFOutput)
actual := convertDataOutput(testPDFOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataLaTeXOutput(t *testing.T) {
func TestConvertDataOutputLaTeX(t *testing.T) {
expected := template.HTML("<pre>LaTeX output</pre>")
actual := convertDataOutput(testDisplayDataLaTeXOutput)
actual := convertDataOutput(testLaTeXOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataSVGOutput(t *testing.T) {
expected := template.HTML(
`<svg id="star" xmlns="http://www.w3.org/2000/svg" width="255" height="240" viewBox="0 0 51 48">
<path d="M25 1l6 17h18L35 29l5 17-15-10-15 10 5-17L1 18h18z"/>
</svg>`,
)
actual := convertDataOutput(testDisplayDataSVGOutput)
func TestConvertDataOutputSVG(t *testing.T) {
expected := template.HTML("<pre>SVG output</pre>")
actual := convertDataOutput(testSVGOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataPNGOutput(t *testing.T) {
expected := template.HTML(fmt.Sprintf(
`<img src="data:image/png;base64,%s">`,
*testDisplayDataPNGOutput.Data.ImagePNG,
))
actual := convertDataOutput(testDisplayDataPNGOutput)
func TestConvertDataOutputPNG(t *testing.T) {
expected := template.HTML(fmt.Sprintf(`<img src="data:image/png;base64,%s">`, testPNGString))
actual := convertDataOutput(testPNGOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataJPEGOutput(t *testing.T) {
expected := template.HTML(fmt.Sprintf(
`<img src="data:image/jpeg;base64,%s">`,
*testDisplayDataJPEGOutput.Data.ImageJPEG,
))
actual := convertDataOutput(testDisplayDataJPEGOutput)
func TestConvertDataOutputJPEG(t *testing.T) {
expected := template.HTML(fmt.Sprintf(`<img src="data:image/jpeg;base64,%s">`, testJPEGString))
actual := convertDataOutput(testJPEGOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataMarkdownOutput(t *testing.T) {
func TestConvertDataOutputMarkdown(t *testing.T) {
expected := template.HTML(`<h1>Hello World</h1>
<p>This is <strong>bold</strong> and <em>italic</em></p>
`)
actual := convertDataOutput(testDisplayDataMarkdownOutput)
actual := convertDataOutput(testMarkdownOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataPlainTextOutput(t *testing.T) {
func TestConvertDataOutputMarkdownCodeInjection(t *testing.T) {
expected := template.HTML(`<h1>Hello World</h1>
<p>This is <strong>bold</strong> and <em>italic</em>
</p>
`)
actual := convertDataOutput(testMarkdownOutputCodeInjection)
assert.Equal(t, expected, actual)
}

func TestConvertDataOutputPlainText(t *testing.T) {
expected := template.HTML(`<pre>multiline
text
data</pre>`)
actual := convertDataOutput(testDisplayDataPlainTextOutput)
actual := convertDataOutput(testPlainTextOutput)
assert.Equal(t, expected, actual)
}

func TestConvertDataOutputPlainTextCodeInjection(t *testing.T) {
expected := template.HTML(`<pre>multiline
text
data
&lt;script&gt;window.alert(&#39;I&#39;m evil!&#39;);&lt;/script&gt;
</pre>`)
actual := convertDataOutput(testPlainTextOutputCodeInjection)
assert.Equal(t, expected, actual)
}

Expand All @@ -101,6 +130,14 @@ With <span class="term-fg31">ANSI colors</span></pre>`)
assert.Equal(t, expected, actual)
}

func TestConvertErrorOutputCodeInjection(t *testing.T) {
expected := template.HTML(`<pre>Error message
With <span class="term-fg31">ANSI colors</span>
&lt;script&gt;window.alert(&#39;I&#39;m evil!&#39;);&lt;&#47;script&gt;</pre>`)
actual := convertErrorOutput(testErrorOutputCodeInjection)
assert.Equal(t, expected, actual)
}

func TestConvertErrorOutputMissingKey(t *testing.T) {
expected := template.HTML("<pre>An unknown error occurred</pre>")
actual := convertErrorOutput(output{OutputType: "error"})
Expand All @@ -116,15 +153,39 @@ func TestConvertMarkdownCell(t *testing.T) {
assert.Equal(t, expected, actual)
}

func TestConvertMarkdownCellCodeInjection(t *testing.T) {
expected := template.HTML(`<h1>Hello World</h1>
<p>This is <strong>bold</strong> and <em>italic</em>
</p>
`)
actual := convertMarkdownCell(testMarkdownCellCodeInjection)
assert.Equal(t, expected, actual)
}

func TestConvertCodeCell(t *testing.T) {
expected := template.HTML(`(?s)<pre class="chroma">.*print.*Hello.*print.*World.*</pre>`)
actual := convertCodeCell(testCodeCell, "py")
assert.Regexp(t, expected, actual)
}

func TestConvertCodeCellCodeInjection(t *testing.T) {
expected := template.HTML(`(?s)<pre class="chroma">.*print.*Hello.*print.*World.*</pre>`)
actual := convertCodeCell(testCodeCellCodeInjection, "py")
assert.Regexp(t, expected, actual)
}

func TestConvertRawCell(t *testing.T) {
expected := template.HTML(`<pre>This is a raw section, without formatting.
This is the second line.</pre>`)
actual := convertRawCell(testRawCell)
assert.Equal(t, expected, actual)
}

func TestConvertRawCellCodeInjection(t *testing.T) {
expected := template.HTML(`<pre>This is a raw section, without formatting.
This is the second line.
&lt;script&gt;window.alert(&#39;I&#39;m evil!&#39;);&lt;/script&gt;</pre>`)
actual := convertRawCell(testRawCellCodeInjection)
assert.Equal(t, expected, actual)
}
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ require (
github.com/buildkite/terminal-to-html v3.2.0+incompatible
github.com/dlclark/regexp2 v1.2.0 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/microcosm-cc/bluemonday v1.0.2
github.com/pkg/errors v0.9.1 // indirect
github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
github.com/stretchr/testify v1.5.1
golang.org/x/net v0.0.0-20200319234117-63522dbf7eec // indirect
golang.org/x/sys v0.0.0-20200317113312-5766fd39f98d // indirect
gopkg.in/russross/blackfriday.v2 v2.0.1
)
Expand Down
Loading

0 comments on commit 061c5c7

Please sign in to comment.