diff --git a/CHANGELOG.md b/CHANGELOG.md index c9bc9e7..4861e0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 2.3.1 (2021-01-22): +- Uses the standard WHATWG URL parser to stop IDNA (Internationalized Domain Name) attacks on the iframe hostname validator. Thanks to Ron Masas of Checkmarx for pointing out the issue and suggesting the use of the WHATWG parser. + ## 2.3.0 (2020-12-16): - Upgrades `htmlparser2` to new major version `^6.0.0`. Thanks to [Bogdan Chadkin](https://github.com/TrySound) for the contribution. diff --git a/index.js b/index.js index e4d48ef..d37ee99 100644 --- a/index.js +++ b/index.js @@ -5,7 +5,6 @@ const { isPlainObject } = require('is-plain-object'); const deepmerge = require('deepmerge'); const parseSrcset = require('parse-srcset'); const { parse: postcssParse } = require('postcss'); -const url = require('url'); // Tags that can conceivably represent stand-alone media. const mediaTags = [ 'img', 'audio', 'video', 'picture', 'svg', @@ -305,12 +304,24 @@ function sanitizeHtml(html, options, _recursing) { if (name === 'iframe' && a === 'src') { let allowed = true; try { + if (value.startsWith('relative:')) { + // An attempt to exploit our workaround for base URLs being + // mandatory for relative URL validation in the WHATWG + // URL parser, reject it + throw new Error('relative: exploit attempt'); + } // naughtyHref is in charge of whether protocol relative URLs - // are cool. We should just accept them - // TODO: Replace deprecated `url.parse` - // eslint-disable-next-line node/no-deprecated-api - parsed = url.parse(value, false, true); - const isRelativeUrl = parsed && parsed.host === null && parsed.protocol === null; + // are cool. Here we are concerned just with allowed hostnames and + // whether to allow relative URLs. + // + // Build a placeholder "base URL" against which any reasonable + // relative URL may be parsed successfully + let base = 'relative://relative-site'; + for (let i = 0; (i < 100); i++) { + base += `/${i}`; + } + const parsed = new URL(value, base); + const isRelativeUrl = parsed && parsed.hostname === 'relative-site' && parsed.protocol === 'relative:'; if (isRelativeUrl) { // default value of allowIframeRelativeUrls is true // unless allowedIframeHostnames or allowedIframeDomains specified diff --git a/test/test.js b/test/test.js index d83ffdc..634ada4 100644 --- a/test/test.js +++ b/test/test.js @@ -1210,4 +1210,63 @@ describe('sanitizeHtml', function() { '
nested text
' ); }); + it('should not allow simple append attacks on iframe hostname validation', function() { + assert.equal( + sanitizeHtml('' + ); + }); + it('should not allow IDNA (Internationalized Domain Name) iframe validation bypass attacks', function() { + assert.equal( + sanitizeHtml('' + ); + }); + it('should parse path-rooted relative URLs sensibly', function() { + assert.equal( + sanitizeHtml(''), + '' + ); + }); + it('should parse bare relative URLs sensibly', function() { + assert.equal( + sanitizeHtml(''), + '' + ); + }); + it('should parse ../ relative URLs sensibly', function() { + assert.equal( + sanitizeHtml(''), + '' + ); + }); + it('should parse protocol relative URLs sensibly', function() { + assert.equal( + sanitizeHtml(''), + '' + ); + }); + it('should reject attempts to hack our use of a relative: protocol in our test base URL', function() { + assert.equal( + sanitizeHtml('' + ); + }); });