diff --git a/docs/adr/heading-identifiers.md b/docs/adr/heading-identifiers.md new file mode 100644 index 000000000..6c5ec5ee3 --- /dev/null +++ b/docs/adr/heading-identifiers.md @@ -0,0 +1,39 @@ +Automatic Headings Identifier Generation +======================================== + +||| +|-|- +|Status | :green_book:`ACCEPTED` +|Issue | [#26], [#253] + +> TL;DR: a short Y-statement summarizing the decision + +In the context of generating heading identifiers for easier URL targeting, +facing the concern that some of them could end up clobbering the DOM +we decided to sanitize the offending identifiers +to achieve proper protection of the DOM, +accepting that in limited use cases where the autor would need to expose a heading that clobbers, that heading would just not get an identifier. + +Context +------- + +> Describe the detailed context and problem being addressed +Automatic heading identifier generation is extremely useful for authors so they can properly share targeted links to the content in their pages. + +Unfortunately, HTML element identifiers are usually exposed on the DOM `document` element for legacy reasons. In some edge cases, the identifier can then end up colliding with existing DOM properties (i.e. `name`, `children`, `location`, etc.) Most modern browsers will protect the DOM API and set these properties as read-only, but some browsers are known to let the properties being overridden. This can become an attack vector for XSS attacks. + +Decision +-------- + +In order to avoid DOM clobbering and reduce the possibility of an XSS attack through that vector, the decision is being made to sanitize the offending heading identifiers by just not exposing them in the DOM. The DOMPurify library that is used to sanitize the whole DOM will automatically remove all `id` properties on the headings that would cause a collision on the DOM properties. + +Consequences +------------ + +All headings will have an automatically generated identifier in the DOM, except those that would clobber the DOM. +The latter will just be output without the `id` attribute. + +If the author still needs a proper identifier being exposed in the DOM for easier navigation (which would be the case on API documentation pages), the proposed solutions are: +- Adding a prefix to the heading (i.e. `Event: OnClick`, `Property: Name`) +- Use a custom matcher function that injects a prefix in the identifier in the HTML (i.e. `

OnClick

`, `

Name

`) +- Use a custom JavaScript function that generates missing identifiers on headings clientside \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 5a44f3c47..fa910c119 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5361,6 +5361,14 @@ "zwitch": "^1.0.0" } }, + "hast-util-sanitize": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/hast-util-sanitize/-/hast-util-sanitize-1.3.0.tgz", + "integrity": "sha512-rQeetoD08jHmDOUYN6h9vTuE0hQN4wymhtkQZ6whHtcjaLpjw5RYAbcdxx9cMgMWERDsSs79UpqHuBLlUHKeOw==", + "requires": { + "xtend": "^4.0.1" + } + }, "hast-util-to-html": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-5.0.1.tgz", diff --git a/package.json b/package.json index fb14073ce..d7eb1f6f8 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ "ajv": "^6.6.2", "callsites": "^3.0.0", "clone": "^2.1.2", + "dompurify": "^1.0.10", "fs-extra": "^8.0.0", "github-slugger": "^1.2.1", "hast-util-select": "^3.0.1", diff --git a/src/defaults/html.pipe.js b/src/defaults/html.pipe.js index 6ff8fb132..670fc2ca7 100644 --- a/src/defaults/html.pipe.js +++ b/src/defaults/html.pipe.js @@ -36,6 +36,7 @@ const tohast = require('../html/html-to-hast'); const tohtml = require('../html/stringify-hast'); const addHeaders = require('../html/add-headers'); const timing = require('../utils/timing'); +const sanitize = require('../html/sanitize'); /* eslint newline-per-chained-call: off */ @@ -63,6 +64,7 @@ const htmlpipe = (cont, payload, action) => { .before(selecttest) .before(html).expose('html') .before(responsive) + .before(sanitize) .once(cont) .after(type('text/html')) .after(cache).when(uncached) diff --git a/src/html/sanitize.js b/src/html/sanitize.js new file mode 100644 index 000000000..cc96d2131 --- /dev/null +++ b/src/html/sanitize.js @@ -0,0 +1,75 @@ +/* + * Copyright 2019 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +const createDOMPurify = require('dompurify'); +const { JSDOM } = require('jsdom'); + +const helixSanitizationConfig = { + // Allowing all ESI tags, see: https://www.w3.org/TR/esi-lang + ADD_TAGS: [ + 'esi:try', + 'esi:attempt', + 'esi:except', + + 'esi:choose', + 'esi:when', + 'esi:otherwise', + + 'esi:include', + 'esi:inline', + 'esi:remove', + + 'esi:vars', + 'esi:comment', + ], + RETURN_DOM: true, +}; + +const CUSTOM_NAME_REGEX = /^\w+-\w+$/; + +/** + * Allow custom elements to be retained by the sanitization. + * + * @param {Object} DOMPurify the DOMPurify instance + */ +function allowCustomElements(DOMPurify) { + DOMPurify.addHook('uponSanitizeElement', (node, data) => { + if (node.nodeName && node.nodeName.match(CUSTOM_NAME_REGEX)) { + data.allowedTags[data.tagName] = true; // eslint-disable-line no-param-reassign + } + }); +} + +/** + * Allow custom attributes to be retained by the sanitization. + * + * @param {Object} DOMPurify the DOMPurify instance + */ +function allowCustomAttributes(DOMPurify) { + DOMPurify.addHook('uponSanitizeAttribute', (node, data) => { + if (data.attrName && data.attrName.match(CUSTOM_NAME_REGEX)) { + data.allowedAttributes[data.attrName] = true; // eslint-disable-line no-param-reassign + } + }); +} + +function sanitize({ content }, { logger }) { + logger.log('debug', 'Sanitizing content body to avoid XSS injections.'); + + const globalContext = (new JSDOM('')).window; + const DOMPurify = createDOMPurify(globalContext); + allowCustomElements(DOMPurify); + allowCustomAttributes(DOMPurify); + const sanitizedBody = DOMPurify.sanitize(content.document.body, helixSanitizationConfig); + content.document.body = sanitizedBody; +} + +module.exports = sanitize; diff --git a/src/utils/heading-handler.js b/src/utils/heading-handler.js index 78f5529f1..fb0f30bf7 100644 --- a/src/utils/heading-handler.js +++ b/src/utils/heading-handler.js @@ -45,7 +45,7 @@ class HeadingHandler { // Inject the id after transformation const n = Object.assign({}, node); const el = fallback(h, n); - el.properties.id = headingIdentifier; + el.properties.id = el.properties.id || headingIdentifier; return el; }; } diff --git a/test/testEmbedHandler.js b/test/testEmbedHandler.js index 7639b1e5d..93bde3833 100644 --- a/test/testEmbedHandler.js +++ b/test/testEmbedHandler.js @@ -11,7 +11,8 @@ */ /* eslint-env mocha */ const assert = require('assert'); -const { Logger } = require('@adobe/helix-shared'); +const { Logger, dom: { assertEquivalentNode } } = require('@adobe/helix-shared'); +const { JSDOM } = require('jsdom'); const embed = require('../src/utils/embed-handler'); const { pipe } = require('../src/defaults/html.pipe.js'); const coerce = require('../src/utils/coerce-secrets'); @@ -121,11 +122,16 @@ https://www.youtube.com/watch?v=KOxbO0EI4MA assert.equal(result.response.status, 201); assert.equal(result.response.headers['Content-Type'], 'text/html'); - assert.equal(result.response.body, `

Hello World -Here comes an embed.

- -

https://www.youtube.com/watch?v=KOxbO0EI4MA

-

Easy!

`); + assertEquivalentNode( + new JSDOM(result.response.body).window.document.body, + new JSDOM(` +

Hello World + Here comes an embed.

+ +

https://www.youtube.com/watch?v=KOxbO0EI4MA

+

Easy!

+ `).window.document.body, + ); }); it('html.pipe processes internal embeds', async () => { @@ -154,10 +160,15 @@ Here comes an embed. assert.equal(result.response.status, 201); assert.equal(result.response.headers['Content-Type'], 'text/html'); - assert.equal(result.response.body, `

Hello World -Here comes an embed.

- -

./foo.md

-

Easy!

`); + assertEquivalentNode( + new JSDOM(result.response.body).window.document.body, + new JSDOM(` +

Hello World + Here comes an embed.

+ +

./foo.md

+

Easy!

+ `).window.document.body, + ); }); }); diff --git a/test/testHTML.js b/test/testHTML.js index 2c936ef2d..388c01ce9 100644 --- a/test/testHTML.js +++ b/test/testHTML.js @@ -11,7 +11,8 @@ */ /* eslint-env mocha */ const assert = require('assert'); -const { Logger } = require('@adobe/helix-shared'); +const { Logger, dom: { assertEquivalentNode } } = require('@adobe/helix-shared'); +const { JSDOM } = require('jsdom'); const fs = require('fs-extra'); const path = require('path'); const NodeHttpAdapter = require('@pollyjs/adapter-node-http'); @@ -688,4 +689,38 @@ ${context.content.document.body.innerHTML}`, .filter(e => !!e); assert.notEqual(found.length, 0); }); + + it('html.pipe sanitizes author-generated content, but not developer-generated code', async () => { + const result = await pipe( + (context) => { + context.response = { + status: 200, + body: ` + ${context.content.document.body.innerHTML} + Baz + `, + }; + }, + { + request: crequest, + content: { + body: ` +# Foo + +[Bar](javascript:alert('XSS')) + `, + }, + }, + { + request: { params }, + secrets, + logger, + }, + ); + assert.equal(200, result.response.status); + assertEquivalentNode( + new JSDOM(result.response.body).window.document.body, + new JSDOM('

Foo

Bar

Baz').window.document.body, + ); + }); }); diff --git a/test/testHTMLFromMarkdown.js b/test/testHTMLFromMarkdown.js index bf4f54033..529bfd4b6 100644 --- a/test/testHTMLFromMarkdown.js +++ b/test/testHTMLFromMarkdown.js @@ -222,11 +222,11 @@ describe('Testing Markdown conversion', () => { # Foo
- +
`, `

Foo

-
+
`); }); @@ -266,4 +266,52 @@ describe('Testing Markdown conversion', () => { `); }); + + it('XSS escape href attribute on links', async () => { + await assertMd(` + [Foo](javascript://%0Dalert('XSS!')) + [Bar](javascript:alert('XSS!')) + `, ` +

+ Foo + Bar +

+ `); + }); + + it('XSS escape href in images', async () => { + await assertMd(` + ![Foo](javascript://%0Dalert('XSS!')) + ![Bar](javascript:alert('XSS!')) + `, ` +

+ Foo + Bar +

+ `); + }); + + it('XSS escape DOM clobbering attributes', async () => { + await assertMd(` + # location + Foo + `, ` +

location

+

Foo

+ `); + }); + + it('Accept custom elements and attributes', async () => { + await assertMd(` + # Foo + Bar + Waldo + `, ` +

Foo

+

Bar

+

+ Waldo +

+ `); + }); });