Skip to content

Commit

Permalink
Fix #1472: check attachments bundle (#1473)
Browse files Browse the repository at this point in the history
* WIP

* Add check for attachments bundles

* Remove useless change

* Remove leftover comment
  • Loading branch information
leplatrem authored Sep 12, 2024
1 parent dd3d493 commit 921f306
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 0 deletions.
119 changes: 119 additions & 0 deletions checks/remotesettings/attachments_bundles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Verify freshness and validity of attachment bundles.
For each collection where the attachments bundle is enable, return the modification timestamp and number of attachments bundled.
"""

import io
import logging
import urllib.parse
import zipfile
from typing import Any

from telescope.typings import CheckResult
from telescope.utils import (
ClientSession,
retry_decorator,
run_parallel,
utcfromhttpdate,
utcfromtimestamp,
)

from .utils import KintoClient, fetch_signed_resources


EXPOSED_PARAMETERS = ["server"]

logger = logging.getLogger(__name__)


@retry_decorator
async def fetch_binary(url: str, **kwargs) -> tuple[int, str, bytes]:
human_url = urllib.parse.unquote(url)
logger.debug(f"Fetch binary from '{human_url}'")
async with ClientSession() as session:
async with session.get(url, **kwargs) as response:
return (
response.status,
response.headers.get("Last-Modified", "Mon, 01 Jan 1970 00:00:00 GMT"),
await response.read(),
)


async def run(
server: str, auth: str, margin_publication_hours: int = 12
) -> CheckResult:
client = KintoClient(server_url=server, auth=auth)
resources = await fetch_signed_resources(server, auth)

logger.debug("Fetch metadata of %s collections", len(resources))
futures = [
client.get_collection(
bucket=resource["source"]["bucket"],
id=resource["source"]["collection"],
)
for resource in resources
]
sources_metadata = await run_parallel(*futures)
resources_sources_metadata = zip(resources, sources_metadata)

metadata_for_bundled = [
(r, m)
for r, m in resources_sources_metadata
if m["data"].get("attachment", {}).get("bundle", False)
]
logger.info("%s collections with attachments bundle", len(metadata_for_bundled))
assert metadata_for_bundled, metadata_for_bundled
records_timestamps = [
resource["last_modified"] for resource, _ in metadata_for_bundled
]

info = await client.server_info()
base_url = info["capabilities"]["attachments"]["base_url"]

futures_bundles = []
for resource, metadata in metadata_for_bundled:
bid = resource["destination"]["bucket"]
cid = metadata["data"]["id"]
url = f"{base_url}bundles/{bid}--{cid}.zip"
futures_bundles.append(fetch_binary(url))
bundles = await run_parallel(*futures_bundles)

timestamps_metadata_bundles = zip(records_timestamps, metadata_for_bundled, bundles)

result: dict[str, dict[str, Any]] = {}
success = True
for timestamp, (resource, metadata), bundle in timestamps_metadata_bundles:
http_status, modified, binary = bundle
bid = resource["destination"]["bucket"]
cid = metadata["data"]["id"]
if http_status >= 400:
result[f"{bid}/{cid}"] = {"status": "missing"}
success = False
continue

try:
z = zipfile.ZipFile(io.BytesIO(binary))
nfiles = len(z.namelist())
except zipfile.BadZipFile:
result[f"{bid}/{cid}"] = {"status": "bad zip"}
success = False
continue

bundle_ts = utcfromhttpdate(modified)
records_ts = utcfromtimestamp(timestamp)
status = (
"outdated"
if ((records_ts - bundle_ts).total_seconds() / 3600)
> margin_publication_hours
else "ok"
)
result[f"{bid}/{cid}"] = {
"status": status,
"size": len(binary),
"attachments": nfiles,
"publication_timestamp": bundle_ts.isoformat(),
"collection_timestamp": records_ts.isoformat(),
}

return success, result
5 changes: 5 additions & 0 deletions telescope/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import email.utils
import json
import logging
import textwrap
Expand Down Expand Up @@ -164,6 +165,10 @@ def utcfromisoformat(iso8601):
return datetime.fromisoformat(iso8601_tz).replace(tzinfo=timezone.utc)


def utcfromhttpdate(httpdate):
return email.utils.parsedate_to_datetime(httpdate).replace(tzinfo=timezone.utc)


def render_checks(func):
async def wrapper(request):
# First, check that client requests supported output format.
Expand Down
150 changes: 150 additions & 0 deletions tests/checks/remotesettings/test_attachments_bundles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import io
import zipfile

from checks.remotesettings.attachments_bundles import run


COLLECTION_URL = "/buckets/{}/collections/{}"
RECORDS_URL = "/buckets/{}/collections/{}/records"
CHANGESET_URL = "/buckets/{}/collections/{}/changeset"


def build_zip(num_files=3):
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
for i in range(num_files):
file_name = f"fake_file_{i}.txt"
zip_file.writestr(file_name, 1024 * "x")
return zip_buffer.getvalue()


async def test_negative(mock_responses, mock_aioresponses):
server_url = "http://fake.local/v1"
mock_responses.get(
server_url + "/",
payload={
"capabilities": {
"attachments": {"base_url": "http://cdn/"},
"signer": {
"resources": [
{
"source": {"bucket": "main-workspace", "collection": None},
"preview": {"bucket": "main-preview", "collection": None},
"destination": {"bucket": "main", "collection": None},
}
]
},
}
},
)
may8_ts = 389664061000
may8_http = "Mon, 08 May 1982 00:01:01 GMT"
may8_iso = "1982-05-08T00:01:01+00:00"

changes_url = server_url + RECORDS_URL.format("monitor", "changes")
mock_responses.get(
changes_url,
payload={
"data": [
{
"id": "abc",
"bucket": "main",
"collection": "missing",
"last_modified": may8_ts,
},
{
"id": "efg",
"bucket": "main",
"collection": "ok",
"last_modified": may8_ts,
},
{
"id": "hij",
"bucket": "main",
"collection": "badzip",
"last_modified": may8_ts,
},
{
"id": "klm",
"bucket": "main",
"collection": "outdated",
"last_modified": may8_ts + 24 * 3600 * 1000 + 60 * 1000,
},
{
"id": "nop",
"bucket": "main",
"collection": "late",
"last_modified": may8_ts + 600 * 1000,
},
{
"id": "qrs",
"bucket": "main",
"collection": "no-bundle",
"last_modified": may8_ts,
},
]
},
)

for cid in ("missing", "ok", "badzip", "outdated", "late", "no-bundle"):
mock_responses.get(
server_url + COLLECTION_URL.format("main-workspace", cid),
payload={
"data": {
"id": cid,
"bucket": "main-workspace",
"attachment": {"bundle": cid != "no-bundle"},
}
},
)

mock_aioresponses.get("http://cdn/bundles/main--missing.zip", status=404)
mock_aioresponses.get(
"http://cdn/bundles/main--ok.zip",
body=build_zip(),
headers={"Last-Modified": may8_http},
)
mock_aioresponses.get(
"http://cdn/bundles/main--outdated.zip",
body=build_zip(num_files=6),
headers={"Last-Modified": may8_http},
)
mock_aioresponses.get(
"http://cdn/bundles/main--late.zip",
body=build_zip(num_files=6),
headers={"Last-Modified": may8_http},
)
mock_aioresponses.get(
"http://cdn/bundles/main--badzip.zip",
body=b"boom",
headers={"Last-Modified": may8_http},
)

status, data = await run(server_url, auth="")

assert status is False
assert data == {
"main/badzip": {"status": "bad zip"},
"main/missing": {"status": "missing"},
"main/ok": {
"status": "ok",
"attachments": 3,
"collection_timestamp": "1982-05-08T00:01:01+00:00",
"publication_timestamp": may8_iso,
"size": 373,
},
"main/late": {
"status": "ok",
"attachments": 6,
"collection_timestamp": "1982-05-08T00:11:01+00:00",
"publication_timestamp": may8_iso,
"size": 724,
},
"main/outdated": {
"attachments": 6,
"collection_timestamp": "1982-05-09T00:02:01+00:00",
"publication_timestamp": may8_iso,
"size": 724,
"status": "outdated",
},
}

0 comments on commit 921f306

Please sign in to comment.