From f72618bf577da36979face46b5c4f7786873b2f1 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Tue, 16 Mar 2021 13:17:06 -0400 Subject: [PATCH] Broken link checkers should fallback to en-US if possible (#3254) * Broken link checkers should fallback nicely to English if need be Fixes #3104 * remove debugging * adding tests --- build/flaws.js | 73 ++++++++++++++++--- kumascript/src/api/web.js | 2 +- testing/tests/index.test.js | 26 +++++++ .../files/fr/web/foo/index.html | 12 +++ 4 files changed, 103 insertions(+), 10 deletions(-) diff --git a/build/flaws.js b/build/flaws.js index 3f916527c532..6426a47daa74 100644 --- a/build/flaws.js +++ b/build/flaws.js @@ -25,6 +25,7 @@ const { } = require("./matches-in-text"); const { humanFileSize } = require("./utils"); const { VALID_MIME_TYPES } = require("../filecheck/constants"); +const { DEFAULT_LOCALE } = require("../libs/constants"); function injectFlaws(doc, $, options, document) { if (doc.isArchive) return; @@ -204,21 +205,37 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) { // us from calling `findMatchesInText()` more than once. const matches = new Map(); + function mutateLink($element, suggestion, enUSFallback) { + if (suggestion) { + $element.attr("href", suggestion); + } else if (enUSFallback) { + $element.attr("href", enUSFallback); + // This functionality here should match what we do inside + // the `web.smartLink()` function in kumascript rendering. + $element.text(`${$element.text()} (${DEFAULT_LOCALE})`); + $element.addClass("only-in-en-us"); + $element.attr("title", "Currently only available in English (US)"); + } else { + throw new Error("Don't use this function if neither is true"); + } + } + // A closure function to help making it easier to append flaws function addBrokenLink( $element, index, href, suggestion = null, - explanation + explanation = null, + enUSFallback = null ) { if (level === FLAW_LEVELS.IGNORE) { // Note, even if not interested in flaws, we still need to apply the // suggestion. For example, in production builds, we don't care about // logging flaws, but because not all `broken_links` flaws have been // manually fixed at the source. - if (suggestion) { - $element.attr("href", suggestion); + if (suggestion || enUSFallback) { + mutateLink($element, suggestion, enUSFallback); } return; } @@ -245,10 +262,9 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) { doc.flaws.broken_links = []; } const id = `link${doc.flaws.broken_links.length + 1}`; - let fixable = false; - if (suggestion) { - $element.attr("href", suggestion); - fixable = true; + const fixable = !!suggestion; + if (suggestion || enUSFallback) { + mutateLink($element, suggestion, enUSFallback); } $element.attr("data-flaw", id); doc.flaws.broken_links.push( @@ -297,7 +313,8 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) { !Image.findByURL(hrefNormalized) && !Archive.isArchivedURL(hrefNormalized) ) { - // Before we give up, check if it's a redirect. + // Even if it's a redirect, it's still a flaw, but it'll be nice to + // know what it *should* be. const resolved = Redirect.resolve(hrefNormalized); if (resolved !== hrefNormalized) { addBrokenLink( @@ -307,7 +324,45 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) { resolved + absoluteURL.search + absoluteURL.hash.toLowerCase() ); } else { - addBrokenLink(a, checked.get(href), href); + let enUSFallbackURL = null; + // Test if the document is a translated document and the link isn't + // to an en-US URL. We know the link is broken (in this locale!) + // but it might be "salvageable" if we link the en-US equivalent. + // This is, by the way, the same trick the `web.smartLink()` utility + // function does in kumascript rendering. + if ( + doc.locale !== DEFAULT_LOCALE && + href.startsWith(`/${doc.locale}/`) + ) { + // What if you swich to the English link; would th link work + // better then? + const enUSHrefNormalized = hrefNormalized.replace( + `/${doc.locale}/`, + `/${DEFAULT_LOCALE}/` + ); + let enUSFound = Document.findByURL(enUSHrefNormalized); + if (enUSFound) { + enUSFallbackURL = enUSFound.url; + } else { + const enUSResolved = Redirect.resolve(enUSHrefNormalized); + if (enUSResolved !== enUSHrefNormalized) { + enUSFallbackURL = + enUSResolved + + absoluteURL.search + + absoluteURL.hash.toLowerCase(); + } + } + } + addBrokenLink( + a, + checked.get(href), + href, + null, + enUSFallbackURL + ? "Can use the English (en-US) link as a fallback" + : null, + enUSFallbackURL + ); } } // But does it have the correct case?! diff --git a/kumascript/src/api/web.js b/kumascript/src/api/web.js index 71e3c86c9e9a..f682e191e6f0 100644 --- a/kumascript/src/api/web.js +++ b/kumascript/src/api/web.js @@ -99,7 +99,7 @@ module.exports = { flawAttribute = ` data-flaw-src="${util.htmlEscape(flaw.macroSource)}"`; return ( '${content} (en-US)` ); } diff --git a/testing/tests/index.test.js b/testing/tests/index.test.js index 9ee58b5ebc2e..dcd76fe36236 100644 --- a/testing/tests/index.test.js +++ b/testing/tests/index.test.js @@ -1299,3 +1299,29 @@ test("unsafe HTML gets flagged as flaws and replace with its raw HTML", () => { const $ = cheerio.load(html); expect($("code.unsafe-html").length).toBe(6); }); + +test("translated content broken links can fall back to en-us", () => { + const builtFolder = path.join(buildRoot, "fr", "docs", "web", "foo"); + const jsonFile = path.join(builtFolder, "index.json"); + + // We should be able to read it and expect certain values + const { doc } = JSON.parse(fs.readFileSync(jsonFile)); + const map = new Map(doc.flaws.broken_links.map((x) => [x.href, x])); + expect(map.get("/fr/docs/Web/CSS/dumber").explanation).toBe( + "Can use the English (en-US) link as a fallback" + ); + expect(map.get("/fr/docs/Web/CSS/number").explanation).toBe( + "Can use the English (en-US) link as a fallback" + ); + + const htmlFile = path.join(builtFolder, "index.html"); + const html = fs.readFileSync(htmlFile, "utf-8"); + const $ = cheerio.load(html); + expect($('article a[href="/fr/docs/Web/CSS/dumber"]').length).toBe(0); + expect($('article a[href="/fr/docs/Web/CSS/number"]').length).toBe(0); + expect($('article a[href="/en-US/docs/Web/CSS/number"]').length).toBe(2); + expect($("article a.only-in-en-us").length).toBe(2); + expect($("article a.only-in-en-us").attr("title")).toBe( + "Currently only available in English (US)" + ); +}); diff --git a/testing/translated-content/files/fr/web/foo/index.html b/testing/translated-content/files/fr/web/foo/index.html index 2940c8b5458e..67a34f931c1f 100644 --- a/testing/translated-content/files/fr/web/foo/index.html +++ b/testing/translated-content/files/fr/web/foo/index.html @@ -10,3 +10,15 @@ Capture d'écran des couleurs
Une image parfaitement normale
+ +

+ This here demonstrates what happens when translated links exist but they're + actually broken. And in this case, what should happen is that it can fall back + on the en-US equivalent of those URLs. +

+