Skip to content

Commit

Permalink
support home and search pages in S3 (#2922)
Browse files Browse the repository at this point in the history
* support home and search pages in S3

* feedbacked

* ensure redirect includes query string

* more redirect cases and tests

* feedback and fixes

* Update deployer/aws-lambda/content-origin-request/index.js

Co-authored-by: Peter Bengtsson <peterbe@mozilla.com>

Co-authored-by: Peter Bengtsson <peterbe@mozilla.com>
  • Loading branch information
escattone and Peter Bengtsson authored Feb 26, 2021
1 parent 1eae33f commit 62ec395
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 27 deletions.
91 changes: 80 additions & 11 deletions deployer/aws-lambda/content-origin-request/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,30 @@ const {
encodePath,
slugToFolder,
} = require("@yari-internal/slug-utils");
const { VALID_LOCALES } = require("@yari-internal/constants");

const THIRTY_DAYS = 3600 * 24 * 30;
const NEEDS_LOCALE = /^\/(?:docs|search|settings|signin|signup)(?:$|\/)/;
// Note that the keys of "VALID_LOCALES" are lowercase locales.
const LOCALE_URI_WITHOUT_TRAILING_SLASH = new Set(
[...VALID_LOCALES.keys()].map((locale) => `/${locale}`)
);
const LOCALE_URI_WITH_TRAILING_SLASH = new Set(
[...VALID_LOCALES.keys()].map((locale) => `/${locale}/`)
);
// TODO: The code that uses LEGACY_URI_NEEDING_TRAILING_SLASH should be
// temporary. For example, when we have moved to the Yari-built
// account settings page, we should add fundamental redirects
// for "/{locale}/account/?" and "/account/?" that redirect to
// "/{locale}/settings" and "/settings" respectively. The other
// cases can be either redirected or deleted eventually as well.
// The goal is to eventually remove the code that uses
// LEGACY_URI_NEEDING_TRAILING_SLASH.
const LEGACY_URI_NEEDING_TRAILING_SLASH = new RegExp(
`^(?:${[...LOCALE_URI_WITHOUT_TRAILING_SLASH].join(
"|"
)})?/(?:account|contribute|maintenance-mode|payments)/?$`
);

const CONTENT_DEVELOPMENT_DOMAIN = ".content.dev.mdn.mozit.cloud";

Expand Down Expand Up @@ -50,43 +74,88 @@ exports.handler = async (event) => {
* Modify the request before it's passed to the S3 origin.
*/
const request = event.Records[0].cf.request;
const requestURILowerCase = request.uri.toLowerCase();
const host = request.headers.host[0].value.toLowerCase();
const qs = request.querystring ? `?${request.querystring}` : "";

const { url, status } = resolveFundamental(request.uri);
if (url) {
// TODO: Do we want to add the query string to the redirect?
// If we decide we do, then we probably need to change
// the caching policy on the "*/docs/* behavior" to
// cache based on the query strings as well.
return redirect(url, {
status,
cacheControlSeconds: 3600 * 24 * 30,
cacheControlSeconds: THIRTY_DAYS,
});
}

// Starting with /docs/ or empty path (/) should redirect to a locale.
// Also trim a trailing slash to avoid a double redirect.
// Do we need to insert the locale? If we do, trim a trailing slash
// to avoid a double redirect, except when requesting the home page.
if (
request.uri.startsWith("/docs/") ||
request.uri === "" ||
request.uri === "/" ||
request.uri === ""
NEEDS_LOCALE.test(requestURILowerCase)
) {
const path = request.uri.endsWith("/")
? request.uri.slice(0, -1)
: request.uri;
const locale = getLocale(request);
// The only time we actually want a trailing slash is when the URL is just
// the locale. E.g. `/en-US/` (not `/en-US`)
return redirect(`/${locale}${path || "/"}`);
return redirect(`/${locale}${path || "/"}` + qs);
}

// A document URL with a trailing slash should redirect
// to the same URL without the trailing slash.
// At this point, the URI is guaranteed to start with a forward slash.
const uriParts = request.uri.split("/");
const uriFirstPart = uriParts[1];
const uriFirstPartLC = uriFirstPart.toLowerCase();

// Do we need to redirect to the properly-cased locale? We also ensure
// here that requests for the home page have a trailing slash, while
// all others do not.
if (
VALID_LOCALES.has(uriFirstPartLC) &&
uriFirstPart !== VALID_LOCALES.get(uriFirstPartLC)
) {
// Assemble the rest of the path without a trailing slash.
const extra = uriParts.slice(2).filter(Boolean).join("/");
return redirect(`/${VALID_LOCALES.get(uriFirstPartLC)}/${extra}${qs}`);
}

// Handle cases related to the presence or absence of a trailing-slash.
if (LOCALE_URI_WITHOUT_TRAILING_SLASH.has(requestURILowerCase)) {
// Home page requests are the special case on MDN. They should
// always have a trailing slash. So a home page URL without a
// trailing slash should redirect to the same URL with a
// trailing slash. When the redirected home-page request is
// processed by this Lambda function, note that we'll remove
// the trailing slash before the request reaches S3 (see below).
return redirect(request.uri + "/" + qs, {
status: 301,
cacheControlSeconds: THIRTY_DAYS,
});
} else if (LOCALE_URI_WITH_TRAILING_SLASH.has(requestURILowerCase)) {
// We've received a proper request for a locale's home page (i.e.,
// it has a traling slash), but since that request will be served
// from S3, we need to strip the trailing slash before it reaches
// S3. This is required because we store the home pages in S3 as
// their path name itself, for example "en-us" for the English home
// page, not "en-us/index.html", which is what S3 would look for if
// we left the trailing slash.
request.uri = request.uri.slice(0, -1);
} else if (
request.uri.endsWith("/") &&
request.uri.toLowerCase().includes("/docs/")
!LEGACY_URI_NEEDING_TRAILING_SLASH.test(requestURILowerCase)
) {
return redirect(request.uri.slice(0, -1), {
// All other requests with a trailing slash should redirect to the
// same URL without the trailing slash.
return redirect(request.uri.slice(0, -1) + qs, {
status: 301,
cacheControlSeconds: 3600 * 24 * 30,
cacheControlSeconds: THIRTY_DAYS,
});
}

// This condition exists to accommodate AWS origin-groups, which
// include two origins, the primary and the secondary, where the
// secondary origin is only attempted if the primary fails. Since
Expand Down
24 changes: 23 additions & 1 deletion testing/integration/headless/map_301.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,9 @@
url_test("/patches", status_code=404),
url_test("/patches/foo", status_code=404),
url_test("/web-tech", status_code=404),
url_test("/web-tech/feed/atom/", status_code=404),
url_test(
"/web-tech/feed/atom/", follow_redirects=True, final_status_code=404
),
url_test("/css/wiki.css", follow_redirects=True, final_status_code=404),
url_test("/css/base.css", follow_redirects=True, final_status_code=404),
url_test("/contests", "http://www.mozillalabs.com/", status_code=302),
Expand Down Expand Up @@ -1033,4 +1035,24 @@
url_test("/en-US/XMLHttpRequest", "/en-US/docs/XMLHttpRequest"),
url_test("/en-US/XMLHttpRequest/", "/en-US/docs/XMLHttpRequest"),
url_test("/en-US/XMLHttpRequest/FormData/", "/en-US/docs/XMLHttpRequest/FormData"),
# Add trailing slash for the home page.
url_test("/en-US", "/en-US/"),
# Some special cases for "/docs".
url_test("/docs", "/docs/Web"),
url_test("/docs/", "/docs/Web"),
url_test("/en-us/docs", "/en-us/docs/Web"),
url_test("/en-us/docs/", "/en-us/docs/Web"),
# Locale and trailing-slash correction redirect tests.
url_test("/EN-us", "/en-US/", status_code=302),
url_test("/EN-us?next=FOO", "/en-US/?next=FOO", status_code=302),
url_test("/EN-US/", "/en-US/", status_code=302),
url_test("/EN-US/?next=FOO", "/en-US/?next=FOO", status_code=302),
url_test("/eN-us/docs/Web", "/en-US/docs/Web", status_code=302),
url_test("/eN-us/docs/Web/", "/en-US/docs/Web", status_code=302),
url_test("/eN-us/docs/Web?next=FOO", "/en-US/docs/Web?next=FOO", status_code=302),
url_test("/eN-us/docs/Web/?next=FOO", "/en-US/docs/Web?next=FOO", status_code=302),
url_test("/en-uS/search", "/en-US/search", status_code=302),
url_test("/en-uS/search/", "/en-US/search", status_code=302),
url_test("/en-Us/search?q=video", "/en-US/search?q=video", status_code=302),
url_test("/en-Us/search/?q=video", "/en-US/search?q=video", status_code=302),
]
4 changes: 2 additions & 2 deletions testing/integration/headless/test_cdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def assert_cached(
("/readiness", 204, None),
("/api/v1/whoami", 200, None),
("/api/v1/search/en-US?q=css", 200, None),
("/en-US/search?q=css", 200, None),
("/csp-violation-capture", 405, None),
("/en-US/profile", 302, "/en-US/users/signin?next=/en-US/profile"),
("/en-US/profile/edit", 302, "/en-US/users/signin?next=/en-US/profile/edit"),
Expand Down Expand Up @@ -206,7 +205,8 @@ def test_not_cached(base_url, is_behind_cdn, slug, status, expected_location):
("/presentations/microsummaries/index.html", 200, None),
("/en-US/account/", 200, None),
("/en-US/search/xml", 200, None),
("/en-US/search.json?q=yada", 301, "/api/v1/search/en-US?q=yada"),
("/en-US/search.json?q=yada", 301, "/api/v1/search?q=yada"),
("/en-US/search?q=css", 200, None),
("/en-US/search/?q=css", 301, "/en-US/search?q=css"),
("/en-US/search/?q=html", 301, "/en-US/search?q=html"),
("/en-US/Firefox", 302, "/en-US/docs/Mozilla/Firefox"),
Expand Down
26 changes: 16 additions & 10 deletions testing/integration/headless/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_hreflang_basic(base_url):
resp = request("get", url)
assert resp.status_code == 200
html = PyQuery(resp.text)
assert html.attr("lang") == "en"
assert html.attr("lang") == "en-US"
assert html.find(
'head > link[hreflang="en"][href="https://developer.mozilla.org/en-US/docs/Web/HTTP"]'
)
Expand Down Expand Up @@ -107,12 +107,12 @@ def test_api_basic(base_url, uri, expected_keys):
"en-US-2": ("en-US", None, "en-US"),
"en-US-3": ("en-US", "en-US", None),
"en-US-4": ("en-US", "en-US", "fr"),
"es-1": ("es", "es", None),
"es-2": ("es", "es", "en-US"),
"es-3": ("es", None, "es"),
"de-1": ("de", "de", None),
"de-2": ("de", "de", "en-US"),
"de-3": ("de", None, "de"),
"fr-1": ("fr", "fr", None),
"fr-2": ("fr", "fr", "en-US"),
"fr-3": ("fr", None, "fr"),
"ja-1": ("ja", "ja", None),
"ja-2": ("ja", "ja", "en-US"),
"ja-3": ("ja", None, "ja"),
}


Expand All @@ -124,17 +124,20 @@ def test_api_basic(base_url, uri, expected_keys):
@pytest.mark.parametrize(
"slug",
[
"",
"/",
"/docs/Web",
"/docs/Web/",
"/search",
"/search/",
"/search?q=video",
"/search/?q=video",
"/events",
"/profile",
"/profiles/sheppy",
"/users/signin",
"/promote",
"/account",
"/docs/Web/HTML",
"/docs/Learn/CSS/Styling_text/Fundamentals#Color",
],
)
def test_locale_selection(base_url, slug, expected, cookie, accept):
Expand All @@ -151,4 +154,7 @@ def test_locale_selection(base_url, slug, expected, cookie, accept):
request_kwargs["cookies"] = {"preferredlocale": cookie}
response = request("get", url, **request_kwargs)
assert response.status_code == 302
assert response.headers["location"].startswith(f"/{expected}/")
extra = "?".join(p.strip("/") for p in slug.split("?"))
assert response.headers["location"].startswith(
f"/{expected}/{extra}"
), f"{response.headers['location']} does not start with {f'/{expected}/{extra}'}"
12 changes: 9 additions & 3 deletions testing/integration/utils/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,13 @@ def assert_valid_url(
# so that the value will appear in locals in test output
resp_location = resp.headers.get("location")
if follow_redirects:
assert resp.status_code == final_status_code
assert (
resp.status_code == final_status_code
), f"got {resp.status_code}, expected {final_status_code}"
else:
assert resp.status_code == status_code
assert (
resp.status_code == status_code
), f"got {resp.status_code}, expected {status_code}"
if location and not follow_redirects:
if query:
# all query values must be lists
Expand All @@ -139,7 +143,9 @@ def assert_valid_url(
# strip off query for further comparison
resp_location = resp_location.split("?")[0]

assert location == unquote(resp_location)
assert location == unquote(
resp_location
), f"got {unquote(resp_location)}, expected {location}"

if resp_headers and not follow_redirects:

Expand Down

0 comments on commit 62ec395

Please sign in to comment.