Skip to content

Commit

Permalink
Broken link checkers should fallback to en-US if possible (#3254)
Browse files Browse the repository at this point in the history
* Broken link checkers should fallback nicely to English if need be

Fixes #3104

* remove debugging

* adding tests
  • Loading branch information
peterbe authored Mar 16, 2021
1 parent 98788b8 commit f72618b
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 10 deletions.
73 changes: 64 additions & 9 deletions build/flaws.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const {
} = require("./matches-in-text");
const { humanFileSize } = require("./utils");
const { VALID_MIME_TYPES } = require("../filecheck/constants");
const { DEFAULT_LOCALE } = require("../libs/constants");

function injectFlaws(doc, $, options, document) {
if (doc.isArchive) return;
Expand Down Expand Up @@ -204,21 +205,37 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) {
// us from calling `findMatchesInText()` more than once.
const matches = new Map();

function mutateLink($element, suggestion, enUSFallback) {
if (suggestion) {
$element.attr("href", suggestion);
} else if (enUSFallback) {
$element.attr("href", enUSFallback);
// This functionality here should match what we do inside
// the `web.smartLink()` function in kumascript rendering.
$element.text(`${$element.text()} (${DEFAULT_LOCALE})`);
$element.addClass("only-in-en-us");
$element.attr("title", "Currently only available in English (US)");
} else {
throw new Error("Don't use this function if neither is true");
}
}

// A closure function to help making it easier to append flaws
function addBrokenLink(
$element,
index,
href,
suggestion = null,
explanation
explanation = null,
enUSFallback = null
) {
if (level === FLAW_LEVELS.IGNORE) {
// Note, even if not interested in flaws, we still need to apply the
// suggestion. For example, in production builds, we don't care about
// logging flaws, but because not all `broken_links` flaws have been
// manually fixed at the source.
if (suggestion) {
$element.attr("href", suggestion);
if (suggestion || enUSFallback) {
mutateLink($element, suggestion, enUSFallback);
}
return;
}
Expand All @@ -245,10 +262,9 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) {
doc.flaws.broken_links = [];
}
const id = `link${doc.flaws.broken_links.length + 1}`;
let fixable = false;
if (suggestion) {
$element.attr("href", suggestion);
fixable = true;
const fixable = !!suggestion;
if (suggestion || enUSFallback) {
mutateLink($element, suggestion, enUSFallback);
}
$element.attr("data-flaw", id);
doc.flaws.broken_links.push(
Expand Down Expand Up @@ -297,7 +313,8 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) {
!Image.findByURL(hrefNormalized) &&
!Archive.isArchivedURL(hrefNormalized)
) {
// Before we give up, check if it's a redirect.
// Even if it's a redirect, it's still a flaw, but it'll be nice to
// know what it *should* be.
const resolved = Redirect.resolve(hrefNormalized);
if (resolved !== hrefNormalized) {
addBrokenLink(
Expand All @@ -307,7 +324,45 @@ function injectBrokenLinksFlaws(doc, $, { rawContent }, level) {
resolved + absoluteURL.search + absoluteURL.hash.toLowerCase()
);
} else {
addBrokenLink(a, checked.get(href), href);
let enUSFallbackURL = null;
// Test if the document is a translated document and the link isn't
// to an en-US URL. We know the link is broken (in this locale!)
// but it might be "salvageable" if we link the en-US equivalent.
// This is, by the way, the same trick the `web.smartLink()` utility
// function does in kumascript rendering.
if (
doc.locale !== DEFAULT_LOCALE &&
href.startsWith(`/${doc.locale}/`)
) {
// What if you swich to the English link; would th link work
// better then?
const enUSHrefNormalized = hrefNormalized.replace(
`/${doc.locale}/`,
`/${DEFAULT_LOCALE}/`
);
let enUSFound = Document.findByURL(enUSHrefNormalized);
if (enUSFound) {
enUSFallbackURL = enUSFound.url;
} else {
const enUSResolved = Redirect.resolve(enUSHrefNormalized);
if (enUSResolved !== enUSHrefNormalized) {
enUSFallbackURL =
enUSResolved +
absoluteURL.search +
absoluteURL.hash.toLowerCase();
}
}
}
addBrokenLink(
a,
checked.get(href),
href,
null,
enUSFallbackURL
? "Can use the English (en-US) link as a fallback"
: null,
enUSFallbackURL
);
}
}
// But does it have the correct case?!
Expand Down
2 changes: 1 addition & 1 deletion kumascript/src/api/web.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ module.exports = {
flawAttribute = ` data-flaw-src="${util.htmlEscape(flaw.macroSource)}"`;
return (
'<a class="only-in-en-us" ' +
'title="Currently only available in English" ' +
'title="Currently only available in English (US)" ' +
`href="${enUSPage.url}"${flawAttribute}>${content} <span>(en-US)</span></a>`
);
}
Expand Down
26 changes: 26 additions & 0 deletions testing/tests/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1299,3 +1299,29 @@ test("unsafe HTML gets flagged as flaws and replace with its raw HTML", () => {
const $ = cheerio.load(html);
expect($("code.unsafe-html").length).toBe(6);
});

test("translated content broken links can fall back to en-us", () => {
const builtFolder = path.join(buildRoot, "fr", "docs", "web", "foo");
const jsonFile = path.join(builtFolder, "index.json");

// We should be able to read it and expect certain values
const { doc } = JSON.parse(fs.readFileSync(jsonFile));
const map = new Map(doc.flaws.broken_links.map((x) => [x.href, x]));
expect(map.get("/fr/docs/Web/CSS/dumber").explanation).toBe(
"Can use the English (en-US) link as a fallback"
);
expect(map.get("/fr/docs/Web/CSS/number").explanation).toBe(
"Can use the English (en-US) link as a fallback"
);

const htmlFile = path.join(builtFolder, "index.html");
const html = fs.readFileSync(htmlFile, "utf-8");
const $ = cheerio.load(html);
expect($('article a[href="/fr/docs/Web/CSS/dumber"]').length).toBe(0);
expect($('article a[href="/fr/docs/Web/CSS/number"]').length).toBe(0);
expect($('article a[href="/en-US/docs/Web/CSS/number"]').length).toBe(2);
expect($("article a.only-in-en-us").length).toBe(2);
expect($("article a.only-in-en-us").attr("title")).toBe(
"Currently only available in English (US)"
);
});
12 changes: 12 additions & 0 deletions testing/translated-content/files/fr/web/foo/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,15 @@
<img src="screenshot.png" alt="Capture d'écran des couleurs" />
<figcaption>Une image parfaitement normale</figcaption>
</figure>

<p>
This here demonstrates what happens when translated links exist but they're
actually broken. And in this case, what should happen is that it can fall back
on the en-US equivalent of those URLs.
</p>
<ul>
<li><a href="/fr/docs/Web/CSS/dumber">The &quot;dumber&quot; page</a></li>
<li>
<a href="/fr/docs/Web/CSS/number">The <i>number</i> page</a>
</li>
</ul>

0 comments on commit f72618b

Please sign in to comment.