From 8c55d0d8790587f2b73a912c4385b9ee7e2c32b7 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Mon, 15 Apr 2019 15:53:28 -0700 Subject: [PATCH 01/17] fix(html pipe): Sanitize generated markdown to avoid XSS attacks Properly escape the initial markdown and custom matchers to avoid potential XSS attacks via JS injection, and also avoid DOM clubbering fix #253 --- package-lock.json | 8 +++++++ src/utils/mdast-to-vdom.js | 1 + src/utils/sanitize-hast.js | 46 ++++++++++++++++++++++++++++++++++++ test/testHTMLFromMarkdown.js | 24 +++++++++++++++++++ test/testMdastToVDOM.js | 6 ++--- 5 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 src/utils/sanitize-hast.js diff --git a/package-lock.json b/package-lock.json index 5a44f3c47..fa910c119 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5361,6 +5361,14 @@ "zwitch": "^1.0.0" } }, + "hast-util-sanitize": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/hast-util-sanitize/-/hast-util-sanitize-1.3.0.tgz", + "integrity": "sha512-rQeetoD08jHmDOUYN6h9vTuE0hQN4wymhtkQZ6whHtcjaLpjw5RYAbcdxx9cMgMWERDsSs79UpqHuBLlUHKeOw==", + "requires": { + "xtend": "^4.0.1" + } + }, "hast-util-to-html": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-5.0.1.tgz", diff --git a/src/utils/mdast-to-vdom.js b/src/utils/mdast-to-vdom.js index ef68f76ac..1768f2516 100644 --- a/src/utils/mdast-to-vdom.js +++ b/src/utils/mdast-to-vdom.js @@ -19,6 +19,7 @@ const unified = require('unified'); const parse = require('rehype-parse'); const { JSDOM } = require('jsdom'); const HeadingHandler = require('./heading-handler'); +const sanitize = require('./sanitize-hast'); const image = require('./image-handler'); const embed = require('./embed-handler'); const link = require('./link-handler'); diff --git a/src/utils/sanitize-hast.js b/src/utils/sanitize-hast.js new file mode 100644 index 000000000..96ad0cede --- /dev/null +++ b/src/utils/sanitize-hast.js @@ -0,0 +1,46 @@ +/* + * Copyright 2019 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +const _ = require('lodash'); +const sanitize = require('hast-util-sanitize'); +const GITHUB_SCHEMA = require('hast-util-sanitize/lib/github'); + +// Define helix specific customizations on top of the default GitHub schema +const HELIX_SCHEMA_CUSTOMIZATIONS = { + tagNames: ['esi:include'], + attributes: { + 'esi:include': ['src'], + code: ['className'], + img: ['sizes', 'srcset'], + }, +}; + +/** + * A customizer function for lodash's mergeWith method that concats arrays. + * @param {*} obj The object to merge into + * @param {*} src The object to merge from + * @returns {Array} the concatenanted array or undefined if the object is not an array + */ +function arrayMergeCustomizer(obj, src) { + if (_.isArray(obj)) { + return obj.concat(src); + } + return undefined; +} + +// Define the sanitization schema for helix +const HELIX_SCHEMA = _.mergeWith( + GITHUB_SCHEMA, + HELIX_SCHEMA_CUSTOMIZATIONS, + arrayMergeCustomizer, +); + +module.exports = hast => sanitize(hast, HELIX_SCHEMA); diff --git a/test/testHTMLFromMarkdown.js b/test/testHTMLFromMarkdown.js index bf4f54033..ddc9ccd49 100644 --- a/test/testHTMLFromMarkdown.js +++ b/test/testHTMLFromMarkdown.js @@ -266,4 +266,28 @@ describe('Testing Markdown conversion', () => { `); }); + + it('XSS escape href attribute on links', async () => { + await assertMd(` + [Foo](javascript://%0Dalert('XSS!')) + [Bar](javascript:alert('XSS!')) + `, ` +

+ Foo + Bar +

+ `); + }); + + it('XSS escape href in images', async () => { + await assertMd(` + ![Foo](javascript://%0Dalert('XSS!')) + ![Bar](javascript:alert('XSS!')) + `, ` +

+ Foo + Bar +

+ `); + }); }); diff --git a/test/testMdastToVDOM.js b/test/testMdastToVDOM.js index 83d415299..5004d3971 100644 --- a/test/testMdastToVDOM.js +++ b/test/testMdastToVDOM.js @@ -94,10 +94,10 @@ describe('Test MDAST to VDOM Transformation', () => { it('Programmatic Matcher Function', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'simple.json')); const transformer = new VDOM(mdast, action.secrets); - transformer.match(({ type }) => type === 'heading', () => '

All Headings are the same to me

'); + transformer.match(({ type }) => type === 'heading', () => '

All Headings are the same to me

'); assertTransformerYieldsDocument( transformer, - '

All Headings are the same to me

', + '

All Headings are the same to me

', ); }); @@ -108,7 +108,7 @@ describe('Test MDAST to VDOM Transformation', () => { assertTransformerYieldsDocument( transformer, `
- +

All Headings are the same to me

`, ); From 65430d466a00776c674b3bcd48723161ca8f5d49 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Tue, 16 Apr 2019 05:31:50 -0700 Subject: [PATCH 02/17] feat(html pipe): add support for anchors on headings Fixes #26 --- src/utils/mdast-to-vdom.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/mdast-to-vdom.js b/src/utils/mdast-to-vdom.js index 1768f2516..9e7464ee7 100644 --- a/src/utils/mdast-to-vdom.js +++ b/src/utils/mdast-to-vdom.js @@ -20,6 +20,7 @@ const parse = require('rehype-parse'); const { JSDOM } = require('jsdom'); const HeadingHandler = require('./heading-handler'); const sanitize = require('./sanitize-hast'); +const HeadingHandler = require('./heading-handler'); const image = require('./image-handler'); const embed = require('./embed-handler'); const link = require('./link-handler'); From e2d79638a3d2e7d83254c18e418ad339c9d9592a Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Mon, 15 Apr 2019 15:53:28 -0700 Subject: [PATCH 03/17] fix(html pipe): Sanitize generated markdown to avoid XSS attacks Properly escape the initial markdown and custom matchers to avoid potential XSS attacks via JS injection, and also avoid DOM clubbering fix #253 --- src/utils/mdast-to-vdom.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/mdast-to-vdom.js b/src/utils/mdast-to-vdom.js index 9e7464ee7..352845440 100644 --- a/src/utils/mdast-to-vdom.js +++ b/src/utils/mdast-to-vdom.js @@ -21,6 +21,7 @@ const { JSDOM } = require('jsdom'); const HeadingHandler = require('./heading-handler'); const sanitize = require('./sanitize-hast'); const HeadingHandler = require('./heading-handler'); +const sanitize = require('./sanitize-hast'); const image = require('./image-handler'); const embed = require('./embed-handler'); const link = require('./link-handler'); From 4b83b7c79fb70cf533cbbcf333863a2a69763a78 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 19 Apr 2019 12:30:55 -0700 Subject: [PATCH 04/17] refactor(html pipe): Remove duplicate import Removing duplicate import fix #253 --- src/utils/mdast-to-vdom.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/mdast-to-vdom.js b/src/utils/mdast-to-vdom.js index 352845440..9e7464ee7 100644 --- a/src/utils/mdast-to-vdom.js +++ b/src/utils/mdast-to-vdom.js @@ -21,7 +21,6 @@ const { JSDOM } = require('jsdom'); const HeadingHandler = require('./heading-handler'); const sanitize = require('./sanitize-hast'); const HeadingHandler = require('./heading-handler'); -const sanitize = require('./sanitize-hast'); const image = require('./image-handler'); const embed = require('./embed-handler'); const link = require('./link-handler'); From 38dc1084f7a33021dc0212068cfac4f52c28cea4 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 19 Apr 2019 12:33:51 -0700 Subject: [PATCH 05/17] refactor(html pipe): Remove duplicate import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing duplicate import fix “253 --- src/utils/mdast-to-vdom.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/mdast-to-vdom.js b/src/utils/mdast-to-vdom.js index 9e7464ee7..1768f2516 100644 --- a/src/utils/mdast-to-vdom.js +++ b/src/utils/mdast-to-vdom.js @@ -20,7 +20,6 @@ const parse = require('rehype-parse'); const { JSDOM } = require('jsdom'); const HeadingHandler = require('./heading-handler'); const sanitize = require('./sanitize-hast'); -const HeadingHandler = require('./heading-handler'); const image = require('./image-handler'); const embed = require('./embed-handler'); const link = require('./link-handler'); From 71f23674d10588de992372e833f1eb41d2e81ae8 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 19 Apr 2019 15:56:26 -0700 Subject: [PATCH 06/17] refactor(html pipe): Sanitize output using DOMPurify Refactor to sanitize the output using DOMPurify in the after step of the pipeline instead of the previous alternative in the mdast to vdom step fix #253 --- package.json | 1 + src/defaults/html.pipe.js | 5 +++++ src/html/sanitize.js | 30 ++++++++++++++++++++++++++++++ src/utils/heading-handler.js | 2 +- src/utils/mdast-to-vdom.js | 1 - test/testHTMLFromMarkdown.js | 22 ++++++++++++++++------ test/testMdastToVDOM.js | 22 +++++++++++----------- 7 files changed, 64 insertions(+), 19 deletions(-) create mode 100644 src/html/sanitize.js diff --git a/package.json b/package.json index fb14073ce..d7eb1f6f8 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ "ajv": "^6.6.2", "callsites": "^3.0.0", "clone": "^2.1.2", + "dompurify": "^1.0.10", "fs-extra": "^8.0.0", "github-slugger": "^1.2.1", "hast-util-select": "^3.0.1", diff --git a/src/defaults/html.pipe.js b/src/defaults/html.pipe.js index 6ff8fb132..065cd6944 100644 --- a/src/defaults/html.pipe.js +++ b/src/defaults/html.pipe.js @@ -35,7 +35,11 @@ const rewriteLinks = require('../html/static-asset-links'); const tohast = require('../html/html-to-hast'); const tohtml = require('../html/stringify-hast'); const addHeaders = require('../html/add-headers'); +<<<<<<< HEAD const timing = require('../utils/timing'); +======= +const sanitize = require('../html/sanitize'); +>>>>>>> refactor(html pipe): Sanitize output using DOMPurify /* eslint newline-per-chained-call: off */ @@ -64,6 +68,7 @@ const htmlpipe = (cont, payload, action) => { .before(html).expose('html') .before(responsive) .once(cont) + .after(sanitize) .after(type('text/html')) .after(cache).when(uncached) .after(key) diff --git a/src/html/sanitize.js b/src/html/sanitize.js new file mode 100644 index 000000000..16d6eae05 --- /dev/null +++ b/src/html/sanitize.js @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +const createDOMPurify = require('dompurify'); +const { JSDOM } = require('jsdom'); + +const helixSanitizationConfig = { ADD_TAGS: ['esi:include'] }; + +function sanitize({ response }, { logger }) { + logger.log('debug', 'Sanitizing reponse body to avoid XSS injections.'); + + const { window } = (new JSDOM('')); + const DOMPurify = createDOMPurify(window); + const sanitizedBody = DOMPurify.sanitize(response.body, helixSanitizationConfig); + return { + response: { + body: sanitizedBody, + }, + }; +} + +module.exports = sanitize; diff --git a/src/utils/heading-handler.js b/src/utils/heading-handler.js index 78f5529f1..5c3b5b91b 100644 --- a/src/utils/heading-handler.js +++ b/src/utils/heading-handler.js @@ -45,7 +45,7 @@ class HeadingHandler { // Inject the id after transformation const n = Object.assign({}, node); const el = fallback(h, n); - el.properties.id = headingIdentifier; + el.properties.id = el.properties.id || `user-content-${headingIdentifier}`; return el; }; } diff --git a/src/utils/mdast-to-vdom.js b/src/utils/mdast-to-vdom.js index 1768f2516..ef68f76ac 100644 --- a/src/utils/mdast-to-vdom.js +++ b/src/utils/mdast-to-vdom.js @@ -19,7 +19,6 @@ const unified = require('unified'); const parse = require('rehype-parse'); const { JSDOM } = require('jsdom'); const HeadingHandler = require('./heading-handler'); -const sanitize = require('./sanitize-hast'); const image = require('./image-handler'); const embed = require('./embed-handler'); const link = require('./link-handler'); diff --git a/test/testHTMLFromMarkdown.js b/test/testHTMLFromMarkdown.js index ddc9ccd49..3cf2c8362 100644 --- a/test/testHTMLFromMarkdown.js +++ b/test/testHTMLFromMarkdown.js @@ -143,7 +143,7 @@ describe('Testing Markdown conversion', () => { Hello World `, ` -

Hello

+

Hello

Hello World\n
`); await assertMd( @@ -166,10 +166,10 @@ describe('Testing Markdown conversion', () => { > > bar `, ` -

Foo

+

Foo

bar

-

Foo

+

Foo

bar

`); @@ -190,7 +190,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](Foo +

Foo

Hello World [link](<foobar)

`); }); @@ -201,7 +201,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](foo bar) `, ` -

Foo

+

Foo

Hello World [link](foo bar)

`); }); @@ -212,7 +212,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](λ) `, ` -

Foo

+

Foo

Hello World link

`); }); @@ -290,4 +290,14 @@ describe('Testing Markdown conversion', () => {

`); }); + + it('XSS escape DOM clobbering attributes', async () => { + await assertMd(` + # location + Foo + `, ` +

location

+

Foo

+ `); + }); }); diff --git a/test/testMdastToVDOM.js b/test/testMdastToVDOM.js index 5004d3971..84f143bcb 100644 --- a/test/testMdastToVDOM.js +++ b/test/testMdastToVDOM.js @@ -48,7 +48,7 @@ describe('Test MDAST to VDOM Transformation', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'simple.json')); assertTransformerYieldsDocument( new VDOM(mdast, action.secrets), - '

Hello World

', + '

Hello World

', ); }); @@ -70,14 +70,14 @@ describe('Test MDAST to VDOM Transformation', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'headings.json')); assertTransformerYieldsDocument( new VDOM(mdast, action.secrets), ` -

Heading 1 (double-underline)

-

Heading 2 (single-underline)

-

Heading 1

-

Heading 2

-

Heading 3

-

Heading 4

-
Heading 5
-
Heading 6
`, +

Heading 1 (double-underline)

+

Heading 2 (single-underline)

+

Heading 1

+

Heading 2

+

Heading 3

+

Heading 4

+
Heading 5
+
Heading 6
`, ); }); @@ -97,7 +97,7 @@ describe('Test MDAST to VDOM Transformation', () => { transformer.match(({ type }) => type === 'heading', () => '

All Headings are the same to me

'); assertTransformerYieldsDocument( transformer, - '

All Headings are the same to me

', + '

All Headings are the same to me

', ); }); @@ -108,7 +108,7 @@ describe('Test MDAST to VDOM Transformation', () => { assertTransformerYieldsDocument( transformer, `
- +

All Headings are the same to me

`, ); From 5d06d151f8f15e86bd103933fb440581d0ce2aad Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 19 Apr 2019 16:02:58 -0700 Subject: [PATCH 07/17] refactor(html pipe): Remove hast-util-sanitize based sanitization logic Remove the hast-util-sanitize based sanitization logic as this is now replaced by the DOMPurify pipeline after step fix #253 --- src/utils/sanitize-hast.js | 46 -------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 src/utils/sanitize-hast.js diff --git a/src/utils/sanitize-hast.js b/src/utils/sanitize-hast.js deleted file mode 100644 index 96ad0cede..000000000 --- a/src/utils/sanitize-hast.js +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2019 Adobe. All rights reserved. - * This file is licensed to you under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. You may obtain a copy - * of the License at http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS - * OF ANY KIND, either express or implied. See the License for the specific language - * governing permissions and limitations under the License. - */ -const _ = require('lodash'); -const sanitize = require('hast-util-sanitize'); -const GITHUB_SCHEMA = require('hast-util-sanitize/lib/github'); - -// Define helix specific customizations on top of the default GitHub schema -const HELIX_SCHEMA_CUSTOMIZATIONS = { - tagNames: ['esi:include'], - attributes: { - 'esi:include': ['src'], - code: ['className'], - img: ['sizes', 'srcset'], - }, -}; - -/** - * A customizer function for lodash's mergeWith method that concats arrays. - * @param {*} obj The object to merge into - * @param {*} src The object to merge from - * @returns {Array} the concatenanted array or undefined if the object is not an array - */ -function arrayMergeCustomizer(obj, src) { - if (_.isArray(obj)) { - return obj.concat(src); - } - return undefined; -} - -// Define the sanitization schema for helix -const HELIX_SCHEMA = _.mergeWith( - GITHUB_SCHEMA, - HELIX_SCHEMA_CUSTOMIZATIONS, - arrayMergeCustomizer, -); - -module.exports = hast => sanitize(hast, HELIX_SCHEMA); From b33a5fa30f2a337612fe8fb78b5e0ee54a0a88e8 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 19 Apr 2019 16:14:43 -0700 Subject: [PATCH 08/17] test(html pipe): Remove unnecessary test modification Removing unnecessary modification to existing tests as the use case is covered by a separate dedicated test now fix #253 --- test/testMdastToVDOM.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testMdastToVDOM.js b/test/testMdastToVDOM.js index 84f143bcb..0055d6117 100644 --- a/test/testMdastToVDOM.js +++ b/test/testMdastToVDOM.js @@ -94,10 +94,10 @@ describe('Test MDAST to VDOM Transformation', () => { it('Programmatic Matcher Function', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'simple.json')); const transformer = new VDOM(mdast, action.secrets); - transformer.match(({ type }) => type === 'heading', () => '

All Headings are the same to me

'); + transformer.match(({ type }) => type === 'heading', () => '

All Headings are the same to me

'); assertTransformerYieldsDocument( transformer, - '

All Headings are the same to me

', + '

All Headings are the same to me

', ); }); From e5ae87d236cc66aa2c5e0a77e31c445399571490 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 26 Apr 2019 12:03:25 -0700 Subject: [PATCH 09/17] refactor(html pipe): Move sanitization step right before pre function execution Move the sanitization step right before the pre function execution so that author-generated content is properly sanitized, but sanitizing the developer-generated content is the responsibility of the template engine fix #253 --- src/defaults/html.pipe.js | 2 +- src/html/sanitize.js | 15 ++++++++++----- test/testEmbedHandler.js | 33 ++++++++++++++++++++++----------- test/testHTMLFromMarkdown.js | 10 +++++----- test/testMdastToVDOM.js | 14 +++++++------- 5 files changed, 45 insertions(+), 29 deletions(-) diff --git a/src/defaults/html.pipe.js b/src/defaults/html.pipe.js index 065cd6944..d9ae4b2e8 100644 --- a/src/defaults/html.pipe.js +++ b/src/defaults/html.pipe.js @@ -67,8 +67,8 @@ const htmlpipe = (cont, payload, action) => { .before(selecttest) .before(html).expose('html') .before(responsive) + .before(sanitize) .once(cont) - .after(sanitize) .after(type('text/html')) .after(cache).when(uncached) .after(key) diff --git a/src/html/sanitize.js b/src/html/sanitize.js index 16d6eae05..f51e8f684 100644 --- a/src/html/sanitize.js +++ b/src/html/sanitize.js @@ -12,17 +12,22 @@ const createDOMPurify = require('dompurify'); const { JSDOM } = require('jsdom'); -const helixSanitizationConfig = { ADD_TAGS: ['esi:include'] }; +const helixSanitizationConfig = { + ADD_TAGS: ['esi:include', 'esi:remove'], + RETURN_DOM: true, +}; -function sanitize({ response }, { logger }) { +function sanitize({ content }, { logger }) { logger.log('debug', 'Sanitizing reponse body to avoid XSS injections.'); const { window } = (new JSDOM('')); const DOMPurify = createDOMPurify(window); - const sanitizedBody = DOMPurify.sanitize(response.body, helixSanitizationConfig); + const sanitizedBody = DOMPurify.sanitize(content.document.body, helixSanitizationConfig); return { - response: { - body: sanitizedBody, + content: { + document: { + body: sanitizedBody, + }, }, }; } diff --git a/test/testEmbedHandler.js b/test/testEmbedHandler.js index 7639b1e5d..6f1c413f7 100644 --- a/test/testEmbedHandler.js +++ b/test/testEmbedHandler.js @@ -11,7 +11,8 @@ */ /* eslint-env mocha */ const assert = require('assert'); -const { Logger } = require('@adobe/helix-shared'); +const { Logger, dom: { assertEquivalentNode } } = require('@adobe/helix-shared'); +const { JSDOM } = require('jsdom'); const embed = require('../src/utils/embed-handler'); const { pipe } = require('../src/defaults/html.pipe.js'); const coerce = require('../src/utils/coerce-secrets'); @@ -121,11 +122,16 @@ https://www.youtube.com/watch?v=KOxbO0EI4MA assert.equal(result.response.status, 201); assert.equal(result.response.headers['Content-Type'], 'text/html'); - assert.equal(result.response.body, `

Hello World -Here comes an embed.

- -

https://www.youtube.com/watch?v=KOxbO0EI4MA

-

Easy!

`); + assertEquivalentNode( + new JSDOM(result.response.body).window.document.body, + new JSDOM(` +

Hello World + Here comes an embed.

+ +

https://www.youtube.com/watch?v=KOxbO0EI4MA

+

Easy!

+ `).window.document.body, + ); }); it('html.pipe processes internal embeds', async () => { @@ -154,10 +160,15 @@ Here comes an embed. assert.equal(result.response.status, 201); assert.equal(result.response.headers['Content-Type'], 'text/html'); - assert.equal(result.response.body, `

Hello World -Here comes an embed.

- -

./foo.md

-

Easy!

`); + assertEquivalentNode( + new JSDOM(result.response.body).window.document.body, + new JSDOM(` +

Hello World + Here comes an embed.

+ +

./foo.md

+

Easy!

+ `).window.document.body, + ); }); }); diff --git a/test/testHTMLFromMarkdown.js b/test/testHTMLFromMarkdown.js index 3cf2c8362..d1f839b00 100644 --- a/test/testHTMLFromMarkdown.js +++ b/test/testHTMLFromMarkdown.js @@ -222,11 +222,11 @@ describe('Testing Markdown conversion', () => { # Foo
- +
`, ` -

Foo

-
+

Foo

+
`); }); @@ -236,7 +236,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](λ) `, ` -

Foo Bar

+

Foo Bar

Hello World link

`); }); @@ -297,7 +297,7 @@ describe('Testing Markdown conversion', () => { Foo `, `

location

-

Foo

+

Foo

`); }); }); diff --git a/test/testMdastToVDOM.js b/test/testMdastToVDOM.js index 0055d6117..3d154bc37 100644 --- a/test/testMdastToVDOM.js +++ b/test/testMdastToVDOM.js @@ -56,13 +56,13 @@ describe('Test MDAST to VDOM Transformation', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'heading-ids.json')); assertTransformerYieldsDocument( new VDOM(mdast, action.secrets), ` -

Foo

-

Bar

-

Baz

-

Qux

-

Bar

-

Bar-1

-

Foo Bar Baz

`, +

Foo

+

Bar

+

Baz

+

Qux

+

Bar

+

Bar-1

+

Foo Bar Baz

`, ); }); From c8fb43cd0f0f44bf4db523376f3e4a8293f35cd9 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 26 Apr 2019 15:47:15 -0700 Subject: [PATCH 10/17] test(html pipe): Pipeline should sanitize author-generated content The HTML pipeline should sanitize the author-generated content, but not the developer code as the templating engine will mostly do that. fix #253 --- test/testHTML.js | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/test/testHTML.js b/test/testHTML.js index 2c936ef2d..8caa8bade 100644 --- a/test/testHTML.js +++ b/test/testHTML.js @@ -11,7 +11,8 @@ */ /* eslint-env mocha */ const assert = require('assert'); -const { Logger } = require('@adobe/helix-shared'); +const { Logger, dom: { assertEquivalentNode } } = require('@adobe/helix-shared'); +const { JSDOM } = require('jsdom'); const fs = require('fs-extra'); const path = require('path'); const NodeHttpAdapter = require('@pollyjs/adapter-node-http'); @@ -688,4 +689,38 @@ ${context.content.document.body.innerHTML}`, .filter(e => !!e); assert.notEqual(found.length, 0); }); + + it('html.pipe sanitizes author-generated content, but not developer-generated code', async () => { + const result = await pipe( + ({ content }) => ({ + response: { + status: 200, + body: ` + ${content.document.body.innerHTML} + Baz + `, + }, + }), + { + request: crequest, + content: { + body: ` +# Foo + +[Bar](javascript:alert('XSS')) + `, + }, + }, + { + request: { params }, + secrets, + logger, + }, + ); + assert.equal(200, result.response.status); + assertEquivalentNode( + new JSDOM('

Foo

Bar

Baz').window.document.body, + new JSDOM(result.response.body).window.document.body, + ); + }); }); From bcbd1ff42b82512b64c223fa86704743d3b40bec Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 26 Apr 2019 15:52:07 -0700 Subject: [PATCH 11/17] refactor(html pipe): Apply PR feedback fix #253 --- src/html/sanitize.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/html/sanitize.js b/src/html/sanitize.js index f51e8f684..baac49e74 100644 --- a/src/html/sanitize.js +++ b/src/html/sanitize.js @@ -18,10 +18,10 @@ const helixSanitizationConfig = { }; function sanitize({ content }, { logger }) { - logger.log('debug', 'Sanitizing reponse body to avoid XSS injections.'); + logger.log('debug', 'Sanitizing content body to avoid XSS injections.'); - const { window } = (new JSDOM('')); - const DOMPurify = createDOMPurify(window); + const globalContext = (new JSDOM('')).window; + const DOMPurify = createDOMPurify(globalContext); const sanitizedBody = DOMPurify.sanitize(content.document.body, helixSanitizationConfig); return { content: { From 247a6b9d76634fadf7720e74a8e4a004b8a38e5f Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 3 May 2019 09:46:29 -0700 Subject: [PATCH 12/17] feat(html pipe): Allow custom elements and attributes in markdown Let the sanitization step accept custom elements and attributes added to the markdown fix #253 --- src/html/sanitize.js | 48 +++++++++++++++++++++++++++++++++++- src/utils/heading-handler.js | 2 +- test/testEmbedHandler.js | 4 +-- test/testHTML.js | 2 +- test/testHTMLFromMarkdown.js | 32 +++++++++++++++++------- 5 files changed, 74 insertions(+), 14 deletions(-) diff --git a/src/html/sanitize.js b/src/html/sanitize.js index baac49e74..b32c9d60c 100644 --- a/src/html/sanitize.js +++ b/src/html/sanitize.js @@ -13,15 +13,61 @@ const createDOMPurify = require('dompurify'); const { JSDOM } = require('jsdom'); const helixSanitizationConfig = { - ADD_TAGS: ['esi:include', 'esi:remove'], + // Allowing all ESI tags, see: https://www.w3.org/TR/esi-lang + ADD_TAGS: [ + 'esi:try', + 'esi:attempt', + 'esi:except', + + 'esi:choose', + 'esi:when', + 'esi:otherwise', + + 'esi:include', + 'esi:inline', + 'esi:remove', + + 'esi:vars', + 'esi:comment', + ], RETURN_DOM: true, }; +const CUSTOM_NAME_REGEX = /^\w+-\w+$/; + +/** + * Allow custom elements to be retained by the sanitization. + * + * @param {Object} DOMPurify the DOMPurify instance + */ +function allowCustomElements(DOMPurify) { + DOMPurify.addHook('uponSanitizeElement', (node, data) => { + if (node.nodeName && node.nodeName.match(CUSTOM_NAME_REGEX)) { + data.allowedTags[data.tagName] = true; // eslint-disable-line no-param-reassign + } + }); +} + +/** + * Allow custom attributes to be retained by the sanitization. + * + * @param {Object} DOMPurify the DOMPurify instance + */ +function allowCustomAttributes(DOMPurify) { + DOMPurify.addHook('uponSanitizeAttribute', (node, data) => { + if (data.attrName && data.attrName.match(CUSTOM_NAME_REGEX)) { + data.allowedAttributes[data.attrName] = true; // eslint-disable-line no-param-reassign + } + }); +} + function sanitize({ content }, { logger }) { logger.log('debug', 'Sanitizing content body to avoid XSS injections.'); const globalContext = (new JSDOM('')).window; const DOMPurify = createDOMPurify(globalContext); + allowCustomElements(DOMPurify); + allowCustomAttributes(DOMPurify); const sanitizedBody = DOMPurify.sanitize(content.document.body, helixSanitizationConfig); return { content: { diff --git a/src/utils/heading-handler.js b/src/utils/heading-handler.js index 5c3b5b91b..fb0f30bf7 100644 --- a/src/utils/heading-handler.js +++ b/src/utils/heading-handler.js @@ -45,7 +45,7 @@ class HeadingHandler { // Inject the id after transformation const n = Object.assign({}, node); const el = fallback(h, n); - el.properties.id = el.properties.id || `user-content-${headingIdentifier}`; + el.properties.id = el.properties.id || headingIdentifier; return el; }; } diff --git a/test/testEmbedHandler.js b/test/testEmbedHandler.js index 6f1c413f7..93bde3833 100644 --- a/test/testEmbedHandler.js +++ b/test/testEmbedHandler.js @@ -129,7 +129,7 @@ https://www.youtube.com/watch?v=KOxbO0EI4MA Here comes an embed.

https://www.youtube.com/watch?v=KOxbO0EI4MA

-

Easy!

+

Easy!

`).window.document.body, ); }); @@ -167,7 +167,7 @@ Here comes an embed. Here comes an embed.

./foo.md

-

Easy!

+

Easy!

`).window.document.body, ); }); diff --git a/test/testHTML.js b/test/testHTML.js index 8caa8bade..7f53fa51a 100644 --- a/test/testHTML.js +++ b/test/testHTML.js @@ -719,8 +719,8 @@ ${context.content.document.body.innerHTML}`, ); assert.equal(200, result.response.status); assertEquivalentNode( - new JSDOM('

Foo

Bar

Baz').window.document.body, new JSDOM(result.response.body).window.document.body, + new JSDOM('

Foo

Bar

Baz').window.document.body, ); }); }); diff --git a/test/testHTMLFromMarkdown.js b/test/testHTMLFromMarkdown.js index d1f839b00..529bfd4b6 100644 --- a/test/testHTMLFromMarkdown.js +++ b/test/testHTMLFromMarkdown.js @@ -143,7 +143,7 @@ describe('Testing Markdown conversion', () => { Hello World `, ` -

Hello

+

Hello

Hello World\n
`); await assertMd( @@ -166,10 +166,10 @@ describe('Testing Markdown conversion', () => { > > bar `, ` -

Foo

+

Foo

bar

-

Foo

+

Foo

bar

`); @@ -190,7 +190,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](Foo +

Foo

Hello World [link](<foobar)

`); }); @@ -201,7 +201,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](foo bar) `, ` -

Foo

+

Foo

Hello World [link](foo bar)

`); }); @@ -212,7 +212,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](λ) `, ` -

Foo

+

Foo

Hello World link

`); }); @@ -225,7 +225,7 @@ describe('Testing Markdown conversion', () => { `, ` -

Foo

+

Foo

`); }); @@ -236,7 +236,7 @@ describe('Testing Markdown conversion', () => { Hello World [link](λ) `, ` -

Foo Bar

+

Foo Bar

Hello World link

`); }); @@ -296,8 +296,22 @@ describe('Testing Markdown conversion', () => { # location Foo `, ` -

location

+

location

Foo

`); }); + + it('Accept custom elements and attributes', async () => { + await assertMd(` + # Foo + Bar + Waldo + `, ` +

Foo

+

Bar

+

+ Waldo +

+ `); + }); }); From 85614e2ce86df1e721b2963e00a600ea4f18ef93 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 3 May 2019 09:54:35 -0700 Subject: [PATCH 13/17] test(html pipe): Fix test for DOM CLobbering Adjust tests after removal of the DOM Clobbering escape logic fix #253 --- test/testMdastToVDOM.js | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/testMdastToVDOM.js b/test/testMdastToVDOM.js index 3d154bc37..83d415299 100644 --- a/test/testMdastToVDOM.js +++ b/test/testMdastToVDOM.js @@ -48,7 +48,7 @@ describe('Test MDAST to VDOM Transformation', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'simple.json')); assertTransformerYieldsDocument( new VDOM(mdast, action.secrets), - '

Hello World

', + '

Hello World

', ); }); @@ -56,13 +56,13 @@ describe('Test MDAST to VDOM Transformation', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'heading-ids.json')); assertTransformerYieldsDocument( new VDOM(mdast, action.secrets), ` -

Foo

-

Bar

-

Baz

-

Qux

-

Bar

-

Bar-1

-

Foo Bar Baz

`, +

Foo

+

Bar

+

Baz

+

Qux

+

Bar

+

Bar-1

+

Foo Bar Baz

`, ); }); @@ -70,14 +70,14 @@ describe('Test MDAST to VDOM Transformation', () => { const mdast = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'headings.json')); assertTransformerYieldsDocument( new VDOM(mdast, action.secrets), ` -

Heading 1 (double-underline)

-

Heading 2 (single-underline)

-

Heading 1

-

Heading 2

-

Heading 3

-

Heading 4

-
Heading 5
-
Heading 6
`, +

Heading 1 (double-underline)

+

Heading 2 (single-underline)

+

Heading 1

+

Heading 2

+

Heading 3

+

Heading 4

+
Heading 5
+
Heading 6
`, ); }); From 042a59f5ec1f0aa6a980bf77d3dc11aa9089a4b3 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Fri, 10 May 2019 08:43:44 -0700 Subject: [PATCH 14/17] docs(html pipe): Add ADR for heading identifiers sanitization Adding an architecture decision record for the sanitization of heading identifiers that would cause DOM clobbering fix #253 --- docs/adr/heading-identifiers.md | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 docs/adr/heading-identifiers.md diff --git a/docs/adr/heading-identifiers.md b/docs/adr/heading-identifiers.md new file mode 100644 index 000000000..6c5ec5ee3 --- /dev/null +++ b/docs/adr/heading-identifiers.md @@ -0,0 +1,39 @@ +Automatic Headings Identifier Generation +======================================== + +||| +|-|- +|Status | :green_book:`ACCEPTED` +|Issue | [#26], [#253] + +> TL;DR: a short Y-statement summarizing the decision + +In the context of generating heading identifiers for easier URL targeting, +facing the concern that some of them could end up clobbering the DOM +we decided to sanitize the offending identifiers +to achieve proper protection of the DOM, +accepting that in limited use cases where the autor would need to expose a heading that clobbers, that heading would just not get an identifier. + +Context +------- + +> Describe the detailed context and problem being addressed +Automatic heading identifier generation is extremely useful for authors so they can properly share targeted links to the content in their pages. + +Unfortunately, HTML element identifiers are usually exposed on the DOM `document` element for legacy reasons. In some edge cases, the identifier can then end up colliding with existing DOM properties (i.e. `name`, `children`, `location`, etc.) Most modern browsers will protect the DOM API and set these properties as read-only, but some browsers are known to let the properties being overridden. This can become an attack vector for XSS attacks. + +Decision +-------- + +In order to avoid DOM clobbering and reduce the possibility of an XSS attack through that vector, the decision is being made to sanitize the offending heading identifiers by just not exposing them in the DOM. The DOMPurify library that is used to sanitize the whole DOM will automatically remove all `id` properties on the headings that would cause a collision on the DOM properties. + +Consequences +------------ + +All headings will have an automatically generated identifier in the DOM, except those that would clobber the DOM. +The latter will just be output without the `id` attribute. + +If the author still needs a proper identifier being exposed in the DOM for easier navigation (which would be the case on API documentation pages), the proposed solutions are: +- Adding a prefix to the heading (i.e. `Event: OnClick`, `Property: Name`) +- Use a custom matcher function that injects a prefix in the identifier in the HTML (i.e. `

OnClick

`, `

Name

`) +- Use a custom JavaScript function that generates missing identifiers on headings clientside \ No newline at end of file From 6abb2183d3c4ce1064bb040bc8783921b2da9fb8 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Tue, 28 May 2019 10:11:18 -0700 Subject: [PATCH 15/17] fix(html pipe): Fix merge conflict Fix a merge conflict from a broken rebase operation fix #253 --- src/defaults/html.pipe.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/defaults/html.pipe.js b/src/defaults/html.pipe.js index d9ae4b2e8..670fc2ca7 100644 --- a/src/defaults/html.pipe.js +++ b/src/defaults/html.pipe.js @@ -35,11 +35,8 @@ const rewriteLinks = require('../html/static-asset-links'); const tohast = require('../html/html-to-hast'); const tohtml = require('../html/stringify-hast'); const addHeaders = require('../html/add-headers'); -<<<<<<< HEAD const timing = require('../utils/timing'); -======= const sanitize = require('../html/sanitize'); ->>>>>>> refactor(html pipe): Sanitize output using DOMPurify /* eslint newline-per-chained-call: off */ From 95f5899143c1d6b298b3616ce8dd16e2d4e59270 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Tue, 28 May 2019 10:35:16 -0700 Subject: [PATCH 16/17] refactor(html pipe): Adapt sanitizing logic to middleware pattern Update the sanitization logic to leverage the newly introduced middleware pattern fix #253 --- src/html/sanitize.js | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/html/sanitize.js b/src/html/sanitize.js index b32c9d60c..cc96d2131 100644 --- a/src/html/sanitize.js +++ b/src/html/sanitize.js @@ -69,13 +69,7 @@ function sanitize({ content }, { logger }) { allowCustomElements(DOMPurify); allowCustomAttributes(DOMPurify); const sanitizedBody = DOMPurify.sanitize(content.document.body, helixSanitizationConfig); - return { - content: { - document: { - body: sanitizedBody, - }, - }, - }; + content.document.body = sanitizedBody; } module.exports = sanitize; From 2c8ea4abb5210c009991804bb75c060eb17e5ad3 Mon Sep 17 00:00:00 2001 From: Julien Ramboz Date: Tue, 28 May 2019 10:45:07 -0700 Subject: [PATCH 17/17] test(html pipe): Fix XSS tests for simplified pipeline step executor Fix XSS tests so the match the new simplified pipeline step executor logic fix #253 --- test/testHTML.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/testHTML.js b/test/testHTML.js index 7f53fa51a..388c01ce9 100644 --- a/test/testHTML.js +++ b/test/testHTML.js @@ -692,15 +692,15 @@ ${context.content.document.body.innerHTML}`, it('html.pipe sanitizes author-generated content, but not developer-generated code', async () => { const result = await pipe( - ({ content }) => ({ - response: { + (context) => { + context.response = { status: 200, body: ` - ${content.document.body.innerHTML} + ${context.content.document.body.innerHTML} Baz `, - }, - }), + }; + }, { request: crequest, content: {