Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cloud-function): redirect non-canonical URLs #11151

Merged
merged 22 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
1c76989
feat(cloud-function): redirect capitalization
caugner May 17, 2024
f614bcb
Revert "feat(cloud-function): redirect capitalization"
caugner Jun 7, 2024
5f52fe9
Merge branch 'main' into MP-1111-redirect-capitalization
caugner Jun 7, 2024
e09c4f0
feat(cloud-function): redirect capitalization (v2)
caugner Jun 7, 2024
0957e23
refactor(cloud-function): rename redirect{Capitalization => NonCanoni…
caugner Jun 7, 2024
af9cebd
fix(cloud-function): parse url with URL() + support query strings
caugner Jun 7, 2024
fe94ad5
fix(cloud-function): compare target with original source
caugner Jun 7, 2024
b58010d
fixup! feat(cloud-function): redirect capitalization (v2)
caugner Jun 7, 2024
038740d
chore(cloud-function): reuse normalizeSlug()
caugner Jun 11, 2024
237126d
refactor(cloud-function): rename normalize{Slug => Path}()
caugner Jun 11, 2024
d2e58ca
refactor(cloud-function): use object for sitemap
caugner Jun 11, 2024
c9a9474
refactor(cloud-function): rename {sitemap => canonicals}
caugner Jun 11, 2024
7b7876b
refactor(build): extract makeSitemapIndexTXT
caugner Jun 11, 2024
1ece8e6
fix(build): distinguish sitemap.{txt,xml.gz}
caugner Jun 11, 2024
687e1e7
refactor(build): extract buildSitemapIndex{TXT,XML}
caugner Jun 11, 2024
ae6a9a9
Merge branch 'main' into MP-1111-redirect-capitalization
caugner Jun 11, 2024
b9bd613
Merge branch 'main' into MP-1111-redirect-capitalization
caugner Jun 12, 2024
285ef56
Merge branch 'main' into MP-1111-redirect-capitalization
caugner Jun 19, 2024
d209673
Merge branch 'main' into MP-1111-redirect-capitalization
caugner Jun 21, 2024
55d08d3
chore(build): remove unused fdir import
caugner Jun 21, 2024
353c553
chore(cloud-function): move canonical redirect down + search-index ro…
caugner Jun 21, 2024
4550113
chore(cloud-function): add comments
caugner Jun 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/prod-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ jobs:
run: |-
npm ci
npm run build-redirects
npm run build-canonicals
- name: Deploy Function
if: ${{ ! vars.SKIP_FUNCTION }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/stage-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ jobs:
run: |
npm ci
npm run build-redirects
npm run build-canonicals
- name: Deploy Function
if: ${{ ! vars.SKIP_FUNCTION }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/xyz-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ jobs:
run: |
npm ci
npm run build-redirects
npm run build-canonicals
- name: Deploy Function
if: ${{ ! vars.SKIP_FUNCTION }}
Expand Down
28 changes: 9 additions & 19 deletions build/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ import {
} from "./index.js";
import { Doc, DocMetadata, Flaws } from "../libs/types/document.js";
import SearchIndex from "./search-index.js";
import { makeSitemapIndexXML, buildSitemap } from "./sitemaps.js";
import { buildSitemapIndex, buildSitemap } from "./sitemaps.js";
import { humanFileSize } from "./utils.js";
import { initSentry } from "./sentry.js";
import { macroRenderTimes } from "../kumascript/src/render.js";
import { fdir } from "fdir";
import { ssrDocument } from "./ssr.js";
import { HydrationData } from "../libs/types/hydration.js";

Expand Down Expand Up @@ -515,25 +514,16 @@ program
if (!options.quiet) {
console.log(chalk.yellow("Building sitemap index file..."));
}
const sitemapsBuilt = new fdir()
.filter((p) => p.endsWith("/sitemap.xml.gz"))
.withFullPaths()
.crawl(path.join(BUILD_OUT_ROOT, "sitemaps"))
.sync()
.sort()
.map((fp) => fp.replace(BUILD_OUT_ROOT, ""));
const sitemapIndexFilePath = path.join(BUILD_OUT_ROOT, "sitemap.xml");
fs.writeFileSync(
sitemapIndexFilePath,
makeSitemapIndexXML(sitemapsBuilt)
);
const sitemapsBuilt = await buildSitemapIndex();

if (!options.quiet) {
console.log(
chalk.green(
`Wrote sitemap index referencing ${sitemapsBuilt.length} sitemaps:\n${sitemapsBuilt.map((s) => `- ${s}`).join("\n")}`
)
);
for (const sitemaps of sitemapsBuilt) {
console.log(
chalk.green(
`Wrote sitemap index referencing ${sitemaps.length} sitemaps:\n${sitemaps.map((s) => `- ${s}`).join("\n")}`
)
);
}
}
return;
}
Expand Down
76 changes: 63 additions & 13 deletions build/sitemaps.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { join } from "node:path";
import { mkdir, writeFile } from "node:fs/promises";
import { mkdir, readFile, writeFile } from "node:fs/promises";
import { gzipSync } from "node:zlib";

import { fdir } from "fdir";

import { BASE_URL, BUILD_OUT_ROOT } from "../libs/env/index.js";

const SITEMAP_BASE_URL = BASE_URL.replace(/\/$/, "");
Expand All @@ -18,10 +20,61 @@ export async function buildSitemap(
pathSuffix = [],
}: { slugPrefix?: string; pathSuffix?: string[] }
) {
const txt = entries.map(({ slug }) => `${slugPrefix}${slug}`).join("\n");
const xml = makeSitemapXML(slugPrefix, entries);
const path = await writeSitemap(xml, ...pathSuffix);

return path;
const dirPath = join(
BUILD_OUT_ROOT,
"sitemaps",
...pathSuffix.map((p) => p.toLowerCase())
);
await mkdir(dirPath, { recursive: true });

const txtPath = join(dirPath, "sitemap.txt");
const xmlPath = join(dirPath, "sitemap.xml.gz");

await Promise.all([
writeFile(txtPath, txt, "utf-8"),
writeFile(xmlPath, gzipSync(xml)),
]);

return xmlPath;
}

export async function buildSitemapIndex() {
return Promise.all([buildSitemapIndexTXT(), buildSitemapIndexXML()]);
}

export async function buildSitemapIndexTXT() {
const sitemaps = new fdir()
.filter((p) => p.endsWith("/sitemap.txt"))
.withFullPaths()
.crawl(join(BUILD_OUT_ROOT, "sitemaps"))
.sync();

const file = join(BUILD_OUT_ROOT, "sitemap.txt");

const content = await makeSitemapIndexTXT(sitemaps);

await writeFile(file, content, "utf-8");

return sitemaps.sort().map((fp) => fp.replace(BUILD_OUT_ROOT, ""));
}

export async function buildSitemapIndexXML() {
const sitemaps = new fdir()
.filter((p) => p.endsWith("/sitemap.xml.gz"))
.withFullPaths()
.crawl(join(BUILD_OUT_ROOT, "sitemaps"))
.sync()
.sort()
.map((fp) => fp.replace(BUILD_OUT_ROOT, ""));

const file = join(BUILD_OUT_ROOT, "sitemap.xml");

await writeFile(file, makeSitemapIndexXML(sitemaps));

return sitemaps.sort().map((fp) => fp.replace(BUILD_OUT_ROOT, ""));
}

function makeSitemapXML(prefix: string, docs: SitemapEntry[]) {
Expand Down Expand Up @@ -62,16 +115,13 @@ export function makeSitemapIndexXML(paths: string[]) {
].join("\n");
}

async function writeSitemap(xml: string, ...paths: string[]) {
const dirPath = join(
BUILD_OUT_ROOT,
"sitemaps",
...paths.map((p) => p.toLowerCase())
);
await mkdir(dirPath, { recursive: true });
/**
* Creates a global text sitemap by merging all text sitemaps.
*/
export async function makeSitemapIndexTXT(paths: string[]) {
const maps = await Promise.all(paths.map((p) => readFile(p, "utf-8")));

const filePath = join(dirPath, "sitemap.xml.gz");
await writeFile(filePath, gzipSync(xml));
const urls = maps.join("\n").split("\n").filter(Boolean);

return filePath;
return urls.sort().join("\n");
}
5 changes: 4 additions & 1 deletion build/spas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,10 @@ export async function buildSPAs(options: {
continue;
}
for (const pathLocale of fs.readdirSync(root)) {
if (!fs.statSync(path.join(root, pathLocale)).isDirectory()) {
if (
!fs.statSync(path.join(root, pathLocale)).isDirectory() ||
!isValidLocale(pathLocale)
) {
continue;
}

Expand Down
1 change: 1 addition & 0 deletions cloud-function/.gcloudignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@
.gitignore

#!include:.gitignore
!canonicals.json
!redirects.json
!src/internal/**
1 change: 1 addition & 0 deletions cloud-function/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ node_modules
.env*
!.env-dist
*.log
canonicals.json
redirects.json
src/**/*.js
src/internal
1 change: 1 addition & 0 deletions cloud-function/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"main": "src/index.js",
"scripts": {
"build": "tsc -b",
"build-canonicals": "cross-env NODE_OPTIONS='--no-warnings=ExperimentalWarning --loader ts-node/esm' node src/build-canonicals.ts",
"build-redirects": "cross-env NODE_OPTIONS='--no-warnings=ExperimentalWarning --loader ts-node/esm' node src/build-redirects.ts",
"copy-internal": "rm -rf ./src/internal && cp -R ../libs ./src/internal",
"gcp-build": "npm run build",
Expand Down
24 changes: 18 additions & 6 deletions cloud-function/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { handleStripePlans } from "./handlers/handle-stripe-plans.js";
import { proxyTelemetry } from "./handlers/proxy-telemetry.js";
import { lowercasePathname } from "./middlewares/lowercase-pathname.js";
import { resolveIndexHTML } from "./middlewares/resolve-index-html.js";
import { redirectNonCanonicals } from "./middlewares/redirect-non-canonicals.js";
import { redirectLeadingSlash } from "./middlewares/redirect-leading-slash.js";
import { redirectMovedPages } from "./middlewares/redirect-moved-pages.js";
import { redirectEnforceTrailingSlash } from "./middlewares/redirect-enforce-trailing-slash.js";
Expand All @@ -26,43 +27,59 @@ import { proxyPong } from "./handlers/proxy-pong.js";
const router = Router();
router.use(stripForwardedHostHeaders);
router.use(redirectLeadingSlash);
// MDN Plus plans.
router.all(
"/api/v1/stripe/plans",
requireOrigin(Origin.main),
handleStripePlans
);
// Backend.
router.all(
["/api/*", "/admin-api/*", "/events/fxa", "/users/fxa/*"],
requireOrigin(Origin.main),
proxyApi
);
// Telemetry.
router.all("/submit/mdn-yari/*", requireOrigin(Origin.main), proxyTelemetry);
router.all("/pong/*", requireOrigin(Origin.main), express.json(), proxyPong);
router.all("/pimg/*", requireOrigin(Origin.main), proxyPong);
// Playground.
router.get(
["/[^/]+/docs/*/runner.html", "/[^/]+/blog/*/runner.html", "/runner.html"],
requireOrigin(Origin.play),
resolveRunnerHtml,
proxyRunner
);
// Assets.
router.get(
["/assets/*", "/sitemaps/*", "/static/*", "/[^/]+.[^/]+"],
requireOrigin(Origin.main),
proxyContent
);
router.get(
"/[^/]+/search-index.json",
requireOrigin(Origin.main),
lowercasePathname,
proxyContent
);
// Root.
router.get("/", requireOrigin(Origin.main), redirectLocale);
// Live samples.
router.get(
["/[^/]+/docs/*/_sample_.*.html", "/[^/]+/blog/*/_sample_.*.html"],
requireOrigin(Origin.liveSamples),
resolveIndexHTML,
proxyContent
);
// Attachments.
router.get(
`/[^/]+/docs/*/*.(${ANY_ATTACHMENT_EXT.join("|")})`,
requireOrigin(Origin.main, Origin.liveSamples, Origin.play),
resolveIndexHTML,
proxyContent
);
// Pages.
router.use(redirectNonCanonicals);
router.get(
"/[^/]+/docs/*",
requireOrigin(Origin.main),
Expand All @@ -81,12 +98,7 @@ router.get(
resolveIndexHTML,
proxyContent
);
router.get(
"/[^/]+/search-index.json",
requireOrigin(Origin.main),
lowercasePathname,
proxyContent
);
// MDN Plus, static pages, etc.
router.get(
"*",
requireOrigin(Origin.main),
Expand Down
40 changes: 40 additions & 0 deletions cloud-function/src/build-canonicals.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { readFile, stat, writeFile } from "node:fs/promises";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";

import * as dotenv from "dotenv";

import { normalizePath } from "./utils.js";

const __dirname = dirname(fileURLToPath(import.meta.url));

const root = join(__dirname, "..", "..");
dotenv.config({
path: join(root, process.env["ENV_FILE"] || ".env"),
});

async function buildCanonicals() {
const { BUILD_OUT_ROOT = join(root, "client", "build") } = process.env;

const sitemapPath = join(BUILD_OUT_ROOT, "sitemap.txt");

const content = await readFile(sitemapPath, "utf-8");
const lines = content.split("\n");
const pages = lines.filter((line) => line.startsWith("/"));

const siteMap: Record<string, string> = {};
for (const page of pages) {
siteMap[normalizePath(page)] = page;
}
console.log(`- ${sitemapPath}: ${pages.length} pages`);

const output = "canonicals.json";

await writeFile(output, JSON.stringify(siteMap));

const count = Object.keys(siteMap).length;
const kb = Math.round((await stat(output)).size / 1024);
console.log(`Wrote ${count} pages in ${kb} KB.`);
}

await buildCanonicals();
46 changes: 46 additions & 0 deletions cloud-function/src/middlewares/redirect-non-canonicals.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { createRequire } from "node:module";

import { NextFunction, Request, Response } from "express";

import { THIRTY_DAYS } from "../constants.js";
import { normalizePath, redirect } from "../utils.js";

const require = createRequire(import.meta.url);
const REDIRECTS = require("../../canonicals.json");
const REDIRECT_SUFFIXES = ["/index.json", "/bcd.json", ""];

export async function redirectNonCanonicals(
req: Request,
res: Response,
next: NextFunction
) {
const parsedUrl = new URL(req.url, `${req.protocol}://${req.headers.host}/`);
const { pathname } = parsedUrl;

// Redirect to canonical version.
// Example:
// - Source: /en-US/docs/web/guide/ajax/getting_started
// - Target: /en-US/docs/Web/Guide/AJAX/Getting_Started
for (const suffix of REDIRECT_SUFFIXES) {
if (!pathname.endsWith(suffix)) {
continue;
}
const originalSource = pathname.substring(
0,
pathname.length - suffix.length
);
const source = normalizePath(originalSource);
if (
typeof REDIRECTS[source] == "string" &&
REDIRECTS[source] !== originalSource
) {
const target = REDIRECTS[source] + suffix + parsedUrl.search;
return redirect(res, target, {
status: 301,
cacheControlSeconds: THIRTY_DAYS,
});
}
}

next();
}
4 changes: 4 additions & 0 deletions cloud-function/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,7 @@ const ANY_ATTACHMENT_REGEXP = createRegExpFromExtensions(
export function isAsset(url: string) {
return ANY_ATTACHMENT_REGEXP.test(url);
}

export function normalizePath(path: string) {
return path.toLowerCase().replace(/\/$/, "");
}
Loading