-
Notifications
You must be signed in to change notification settings - Fork 510
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(cloud-function): redirect non-canonical URLs (#11151)
Previously, we served the same content independent of capitalization: - https://developer.mozilla.org/en-US/docs/web (incorrect capitalization) - https://developer.mozilla.org/en-US/docs/Web (correct capitalization) Now, we build a map to match the requested against the canonical URL, redirecting if necessary.
- Loading branch information
Showing
13 changed files
with
190 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,5 +15,6 @@ | |
.gitignore | ||
|
||
#!include:.gitignore | ||
!canonicals.json | ||
!redirects.json | ||
!src/internal/** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ node_modules | |
.env* | ||
!.env-dist | ||
*.log | ||
canonicals.json | ||
redirects.json | ||
src/**/*.js | ||
src/internal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import { readFile, stat, writeFile } from "node:fs/promises"; | ||
import { dirname, join } from "node:path"; | ||
import { fileURLToPath } from "node:url"; | ||
|
||
import * as dotenv from "dotenv"; | ||
|
||
import { normalizePath } from "./utils.js"; | ||
|
||
const __dirname = dirname(fileURLToPath(import.meta.url)); | ||
|
||
const root = join(__dirname, "..", ".."); | ||
dotenv.config({ | ||
path: join(root, process.env["ENV_FILE"] || ".env"), | ||
}); | ||
|
||
async function buildCanonicals() { | ||
const { BUILD_OUT_ROOT = join(root, "client", "build") } = process.env; | ||
|
||
const sitemapPath = join(BUILD_OUT_ROOT, "sitemap.txt"); | ||
|
||
const content = await readFile(sitemapPath, "utf-8"); | ||
const lines = content.split("\n"); | ||
const pages = lines.filter((line) => line.startsWith("/")); | ||
|
||
const siteMap: Record<string, string> = {}; | ||
for (const page of pages) { | ||
siteMap[normalizePath(page)] = page; | ||
} | ||
console.log(`- ${sitemapPath}: ${pages.length} pages`); | ||
|
||
const output = "canonicals.json"; | ||
|
||
await writeFile(output, JSON.stringify(siteMap)); | ||
|
||
const count = Object.keys(siteMap).length; | ||
const kb = Math.round((await stat(output)).size / 1024); | ||
console.log(`Wrote ${count} pages in ${kb} KB.`); | ||
} | ||
|
||
await buildCanonicals(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import { createRequire } from "node:module"; | ||
|
||
import { NextFunction, Request, Response } from "express"; | ||
|
||
import { THIRTY_DAYS } from "../constants.js"; | ||
import { normalizePath, redirect } from "../utils.js"; | ||
|
||
const require = createRequire(import.meta.url); | ||
const REDIRECTS = require("../../canonicals.json"); | ||
const REDIRECT_SUFFIXES = ["/index.json", "/bcd.json", ""]; | ||
|
||
export async function redirectNonCanonicals( | ||
req: Request, | ||
res: Response, | ||
next: NextFunction | ||
) { | ||
const parsedUrl = new URL(req.url, `${req.protocol}://${req.headers.host}/`); | ||
const { pathname } = parsedUrl; | ||
|
||
// Redirect to canonical version. | ||
// Example: | ||
// - Source: /en-US/docs/web/guide/ajax/getting_started | ||
// - Target: /en-US/docs/Web/Guide/AJAX/Getting_Started | ||
for (const suffix of REDIRECT_SUFFIXES) { | ||
if (!pathname.endsWith(suffix)) { | ||
continue; | ||
} | ||
const originalSource = pathname.substring( | ||
0, | ||
pathname.length - suffix.length | ||
); | ||
const source = normalizePath(originalSource); | ||
if ( | ||
typeof REDIRECTS[source] == "string" && | ||
REDIRECTS[source] !== originalSource | ||
) { | ||
const target = REDIRECTS[source] + suffix + parsedUrl.search; | ||
return redirect(res, target, { | ||
status: 301, | ||
cacheControlSeconds: THIRTY_DAYS, | ||
}); | ||
} | ||
} | ||
|
||
next(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters