From 7766fd266048c17f3b7ede5678297784de566f35 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Thu, 11 Mar 2021 18:10:09 -0500 Subject: [PATCH] unsafe html should be a breaking flaw (#3192) * Unsafe HTML should be a breaking flaw Part of #3177 * tests * undo unnecessary if statement * restore newline * Update testing/content/files/en-us/web/unsafe_html/index.html * Update testing/content/files/en-us/web/unsafe_html/index.html Co-authored-by: Ryan Johnson --- build/constants.js | 1 + build/flaws.js | 198 ++++++++++++------ client/src/document/toolbar/flaws.tsx | 32 +++ client/src/document/types.tsx | 7 + client/src/flaw-utils.js | 1 + kumascript/src/api/util.js | 16 -- kumascript/src/api/wiki.js | 1 - .../files/en-us/web/unsafe_html/index.html | 34 +++ testing/tests/index.test.js | 19 ++ 9 files changed, 226 insertions(+), 83 deletions(-) create mode 100644 testing/content/files/en-us/web/unsafe_html/index.html diff --git a/build/constants.js b/build/constants.js index 3e6ebe628c1d..9a8e6d6bb941 100644 --- a/build/constants.js +++ b/build/constants.js @@ -31,6 +31,7 @@ const VALID_FLAW_CHECKS = new Set([ "bad_pre_tags", "sectioning", "heading_links", + "unsafe_html", ]); // TODO (far future): Switch to "error" when number of flaws drops. diff --git a/build/flaws.js b/build/flaws.js index a10ccdbd9e5b..f3ce302e7692 100644 --- a/build/flaws.js +++ b/build/flaws.js @@ -12,7 +12,11 @@ const imageminSvgo = require("imagemin-svgo"); const sanitizeFilename = require("sanitize-filename"); const { Archive, Document, Redirect, Image } = require("../content"); -const { FLAW_LEVELS } = require("./constants"); +const { FLAW_LEVELS, VALID_FLAW_CHECKS } = require("./constants"); +const { + INTERACTIVE_EXAMPLES_BASE_URL, + LIVE_SAMPLES_BASE_URL, +} = require("../kumascript/src/constants"); const { packageBCD } = require("./resolve-bcd"); const { findMatchesInText, @@ -22,26 +26,134 @@ const { const { humanFileSize } = require("./utils"); const { VALID_MIME_TYPES } = require("../filecheck/constants"); -function injectFlaws(doc, $, options, { rawContent }) { +function injectFlaws(doc, $, options, document) { if (doc.isArchive) return; - injectBrokenLinksFlaws( - options.flawLevels.get("broken_links"), - doc, - $, - rawContent - ); + const flawChecks = [ + ["unsafe_html", injectUnsafeHTMLFlaws, false], + ["broken_links", injectBrokenLinksFlaws, true], + ["bad_bcd_queries", injectBadBCDQueriesFlaws, false], + ["bad_pre_tags", injectPreTagFlaws, false], + ["heading_links", injectHeadingLinksFlaws, false], + ]; + + // Note that some flaw checking functions need to always run. Even if we're not + // recording the flaws, the checks that it does are important for regular + // building. + + for (const [flawName, func, alwaysRun] of flawChecks) { + // Sanity check the list of flaw names that they're all recognized. + // Basically a cheap enum check. + if (!VALID_FLAW_CHECKS.has(flawName)) { + throw new Error(`'${flawName}' is not a valid flaw check name`); + } - injectBadBCDQueriesFlaws(options.flawLevels.get("bad_bcd_queries"), doc, $); + const level = options.flawLevels.get(flawName); + if (!alwaysRun && level === FLAW_LEVELS.IGNORE) { + continue; + } - injectPreTagFlaws(options.flawLevels.get("bad_pre_tags"), doc, $, rawContent); + // The flaw injection function will mutate the `doc.flaws` object. + func(doc, $, document, level); - injectHeadingLinksFlaws( - options.flawLevels.get("heading_links"), - doc, - $, - rawContent - ); + if ( + level === FLAW_LEVELS.ERROR && + doc.flaws[flawName] && + doc.flaws[flawName].length > 0 + ) { + throw new Error( + `${flawName} flaws: ${doc.flaws[flawName].map((f) => f.explanation)}` + ); + } + } +} + +function injectUnsafeHTMLFlaws(doc, $, { rawContent }) { + function addFlaw(element, explanation) { + if (!("unsafe_html" in doc.flaws)) { + doc.flaws.unsafe_html = []; + } + const id = `unsafe_html${doc.flaws.unsafe_html.length + 1}`; + let html = $.html($(element)); + $(element).replaceWith($("").addClass("unsafe-html").text(html)); + // Some nasty tags are so broken they can make the HTML become more or less + // the whole page. E.g. ``. + if (html.length > 100) { + html = html.slice(0, Math.min(html.indexOf("\n"), 100)) + "…"; + } + // Perhaps in the future we can make it possibly fixable to delete it. + const fixable = false; + const suggestion = null; + + const flaw = { + explanation, + id, + fixable, + html, + suggestion, + }; + for (const { line, column } of findMatchesInText(html, rawContent)) { + // This might not find anything because the HTML might have mutated + // slightly because of how cheerio parses it. But it doesn't hurt to try. + flaw.line = line; + flaw.column = column; + } + + doc.flaws.unsafe_html.push(flaw); + } + + const safeIFrameSrcs = [ + LIVE_SAMPLES_BASE_URL.toLowerCase(), + INTERACTIVE_EXAMPLES_BASE_URL.toLowerCase(), + // EmbedGHLiveSample.ejs + "https://mdn.github.io", + // EmbedYouTube.ejs + "https://www.youtube-nocookie.com", + // JSFiddleEmbed.ejs + "https://jsfiddle.net", + // EmbedTest262ReportResultsTable.ejs + "https://test262.report", + ]; + + $("script, embed, object, iframe").each((i, element) => { + const { tagName } = element; + if (tagName === "iframe") { + // For iframes we only check the 'src' value + const src = $(element).attr("src"); + if (!safeIFrameSrcs.find((s) => src.toLowerCase().startsWith(s))) { + addFlaw(element, `Unsafe + +

Here's a link that contains the string :JavaScript within the href +attribute:
+ + A beginner's guide to SpiderMonkey, Mozilla's JavaScript engine +

+ +
    +
  • I'm
  • +
  • sneaky
  • +
+ + diff --git a/testing/tests/index.test.js b/testing/tests/index.test.js index e2545ab2ed52..1ba331ca4dd9 100644 --- a/testing/tests/index.test.js +++ b/testing/tests/index.test.js @@ -1280,3 +1280,22 @@ test("'lang' attribute should match the article", () => { expect($("html").attr("lang")).toBe("en-US"); expect($("article").attr("lang")).toBe("en-US"); }); + +test("unsafe HTML gets flagged as flaws and replace with its raw HTML", () => { + const builtFolder = path.join( + buildRoot, + "en-us", + "docs", + "web", + "unsafe_html" + ); + + const jsonFile = path.join(builtFolder, "index.json"); + const { doc } = JSON.parse(fs.readFileSync(jsonFile)); + expect(doc.flaws.unsafe_html.length).toBe(5); + + const htmlFile = path.join(builtFolder, "index.html"); + const html = fs.readFileSync(htmlFile, "utf-8"); + const $ = cheerio.load(html); + expect($("code.unsafe-html").length).toBe(5); +});