Skip to content

Commit

Permalink
Add --service-worker option
Browse files Browse the repository at this point in the history
This can be used to load and serve replay service worker. Initially, this is compatible with wabac.js but in future we could support other replay service worker implementations such as reconstructive.js too.
  • Loading branch information
ato committed Sep 4, 2024
1 parent a97e9eb commit e8d4a66
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 10 deletions.
49 changes: 49 additions & 0 deletions resources/outbackcdx/replay.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
async function init() {
const path = window.location.pathname;
const segments = path.split("/");
const collection = segments[1];
const url = path.substring(segments[1].length + segments[2].length + 3);

const collAdded = new Promise((resolve) => {
navigator.serviceWorker.addEventListener('message', (event) => {
if (event.data.msg_type === 'collAdded') {
resolve();
}
});
});

await navigator.serviceWorker.register("/sw.js");

if (navigator.serviceWorker.controller || (await navigator.serviceWorker.ready).active) {
navigator.serviceWorker.controller.postMessage({
msg_type: 'addColl',
name: collection,
type: 'live',
file: {'sourceUrl': 'proxy:'},
extraConfig: {
baseUrl: window.location.href,
isLive: false,
archivePrefix: '/' + segments[1] + '/',
}
});
}

window.addEventListener('message', event => {
let data = event.data;
if (data.wb_type === 'title') {
document.title = data.title;
} else if (data.wb_type === 'replace-url') {
history.replaceState({}, data.title, '/' + collection + '/' + data.ts + '/' + data.url)
}
});

await collAdded;

const style = document.createElement("style");
style.innerHTML = 'html, body, iframe { margin:0; padding:0; width: 100%; height: 100% }';
const iframe = document.createElement('iframe');
iframe.src = '/w/' + segments[1] + '/' + segments[2] + 'mp_/' + url;
document.body.append(style, iframe);
}

init();
17 changes: 15 additions & 2 deletions src/outbackcdx/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import java.net.*;
import java.nio.channels.Channel;
import java.nio.channels.ServerSocketChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand All @@ -39,8 +41,9 @@ public static void usage() {
System.err.println(" --max-num-results N Max number of records to scan to calculate numresults statistic in the XML protocol (default 10000)");
System.err.println(" --omit-self-redirects Omit self redirects from query results by default");
System.err.println(" -p port Local port to listen on");
System.err.println(" -t count Number of web server threads");
System.err.println(" -r count Cap on number of rocksdb records to scan to serve a single request");
System.err.println(" --service-worker FILE Sets a JavaScript file to use as the replay service worker");
System.err.println(" -t count Number of web server threads");
System.err.println(" --warc-base-url URL Enables replay of WARC records by reading WARC files with this URL prefix");
System.err.println(" -x Output CDX14 by default (instead of CDX11)");
System.err.println(" -v Verbose logging");
Expand Down Expand Up @@ -85,6 +88,7 @@ public static void main(String[] args) {
Map<String,ComputedField> computedFields = new HashMap<>();
QueryConfig queryConfig = new QueryConfig();
String warcBaseUrl = null;
String serviceWorker = null;

Map<String,Object> dashboardConfig = new HashMap<>();
dashboardConfig.put("featureFlags", FeatureFlags.asMap());
Expand Down Expand Up @@ -168,6 +172,15 @@ public static void main(String[] args) {
case "--batch-size":
batchSize = Long.parseLong(args[++i]);
break;
case "--service-worker":
Path path = Path.of(args[++i]);
try {
serviceWorker = Files.readString(path);
} catch (IOException e) {
System.err.println("Error reading service worker from " + path + ": " + e);
System.exit(1);
}
break;
case "--warc-base-url":
warcBaseUrl = args[++i];
break;
Expand All @@ -190,7 +203,7 @@ public static void main(String[] args) {
replay = new Replay(warcBaseUrl);
}
try (DataStore dataStore = new DataStore(dataPath, maxOpenSstFiles, replicationWindow, scanCap, canonicalizer)) {
Webapp controller = new Webapp(dataStore, verbose, dashboardConfig, canonicalizer, computedFields, maxNumResults, queryConfig, replay);
Webapp controller = new Webapp(dataStore, verbose, dashboardConfig, canonicalizer, computedFields, maxNumResults, queryConfig, replay, serviceWorker);
if (undertow) {
UWeb.UServer server = new UWeb.UServer(host, port, contextPath, controller, authorizer);
server.start();
Expand Down
14 changes: 13 additions & 1 deletion src/outbackcdx/Replay.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.net.HttpURLConnection;
import java.net.URI;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.Locale;
import java.util.Set;
Expand All @@ -28,6 +29,16 @@ public Replay(String warcBaseUrl) {
this.warcBaseUrl = warcBaseUrl;
}

public Web.Response replay(Index index, String date, String url, String modifier, Web.Request request) throws IOException {
if (modifier.equals("id_")) {
return replayIdentity(index, date, url, request);
} else if (modifier.isEmpty()) {
return new Web.Response(200, "text/html", "<!doctype html><body><script src=/replay.js></script>");
} else {
throw new IllegalArgumentException("modifier must be either id_ or empty");
}
}

public Web.Response replayIdentity(Index index, String date, String url, Web.Request request) throws IOException {
Capture capture = findClosestCapture(index, date, url);
if (capture == null) return new Web.Response(NOT_FOUND, "text/plain", "Not in archive");
Expand All @@ -37,9 +48,10 @@ public Web.Response replayIdentity(Index index, String date, String url, Web.Req
WarcRecord record = warcReader.next().orElse(null);
if (record == null) throw new IOException("Missing WARC record");

OffsetDateTime captureDate = record.date().atOffset(ZoneOffset.UTC);
MultiMap<String, String> headers = new MultiMap<>();
headers.add("Access-Control-Allow-Origin", "*");
headers.add("Memento-Datetime", RFC_1123_DATE_TIME.format(record.date().atOffset(ZoneOffset.UTC)));
headers.add("Memento-Datetime", RFC_1123_DATE_TIME.format(captureDate));
if (record instanceof WarcResponse) {
HttpResponse http = ((WarcResponse) record).http();
http.headers().map().forEach((name, values) -> {
Expand Down
20 changes: 15 additions & 5 deletions src/outbackcdx/Webapp.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class Webapp implements Web.Handler {
private final long maxNumResults;
private final WbCdxApi wbCdxApi;
private final Replay replay;
private final String serviceWorker;

private static ServiceLoader<FilterPlugin> fpLoader = ServiceLoader.load(FilterPlugin.class);

Expand All @@ -55,10 +56,11 @@ private Response deleteAccessRule(Web.Request req) throws IOException, Web.Respo
return found ? ok() : notFound();
}

Webapp(DataStore dataStore, boolean verbose, Map<String, Object> dashboardConfig, UrlCanonicalizer canonicalizer, Map<String, ComputedField> computedFields, long maxNumResults, QueryConfig queryConfig, Replay replay) {
Webapp(DataStore dataStore, boolean verbose, Map<String, Object> dashboardConfig, UrlCanonicalizer canonicalizer, Map<String, ComputedField> computedFields, long maxNumResults, QueryConfig queryConfig, Replay replay, String serviceWorker) {
this.dataStore = dataStore;
this.verbose = verbose;
this.dashboardConfig = dashboardConfig;
this.serviceWorker = serviceWorker;
if (canonicalizer == null) {
canonicalizer = new UrlCanonicalizer();
}
Expand Down Expand Up @@ -86,6 +88,8 @@ private Response deleteAccessRule(Web.Request req) throws IOException, Web.Respo
router.on(GET, "/database.svg", serve("database.svg"));
router.on(GET, "/outback.svg", serve("outback.svg"));
router.on(GET, "/favicon.ico", serve("outback.svg"));
router.on(GET, "/replay.js", serve("replay.js"));
router.on(GET, "/sw.js", this::serviceWorker);
router.on(GET, "/swagger.json", serve("swagger.json"));
router.on(GET, "/lib/vue-router/2.0.0/vue-router.js", serve("lib/vue-router/2.0.0/vue-router.js"));
router.on(GET, "/lib/vue/" + version("org.webjars.npm", "vue") + "/vue.js", serve("/META-INF/resources/webjars/vue/" + version("org.webjars.npm", "vue") + "/dist/vue.js"));
Expand All @@ -108,7 +112,7 @@ private Response deleteAccessRule(Web.Request req) throws IOException, Web.Respo
router.on(POST, "/<collection>/truncate_replication", request -> flushWal(request));
router.on(POST, "/<collection>/compact", request -> compact(request), Permission.INDEX_EDIT);
router.on(POST, "/<collection>/upgrade", request -> upgrade(request), Permission.INDEX_EDIT);
router.on(GET, "/<collection>/<date:[0-9]+>id_/<url:.*>", this::replayIdentity);
router.on(GET, "/<collection>/<date:[0-9]+><modifier:id_|>/<url:.*>", this::replay);

if (FeatureFlags.experimentalAccessControl()) {
router.on(GET, "/<collection>/ap/<accesspoint>", request -> query(request));
Expand Down Expand Up @@ -666,12 +670,18 @@ Response checkAccessBulk(Web.Request request) throws IOException, ResponseExcept
return jsonResponse(responses);
}

private Response replayIdentity(Request request) throws ResponseException, IOException {
if (replay == null) return new Response(404, "text/plain", "Replay not enabled (try setting --warc-base-url)");
private Response replay(Request request) throws ResponseException, IOException {
if (replay == null) return new Response(NOT_FOUND, "text/plain", "Replay not configured (try setting --warc-base-url)");
String date = request.param("date");
String url = request.param("url");
String modifier = request.param("modifier");
Index index = getIndex(request);
return replay.replayIdentity(index, date, url, request);
return replay.replay(index, date, url, modifier, request);
}

private Response serviceWorker(Request request) {
if (serviceWorker == null) return new Response(NOT_FOUND, "text/plain", "Service worker not configured (try setting --service-worker)");
return new Response(OK, "application/javascript", serviceWorker);
}

@Override
Expand Down
2 changes: 1 addition & 1 deletion test/outbackcdx/ReplicationFeaturesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class ReplicationFeaturesTest {
public void setUp() throws IOException {
File root = folder.newFolder();
manager = new DataStore(root, 256, null, Long.MAX_VALUE, null);
webapp = new Webapp(manager, false, Collections.emptyMap(), null, Collections.emptyMap(), 10000, new QueryConfig(), null);
webapp = new Webapp(manager, false, Collections.emptyMap(), null, Collections.emptyMap(), 10000, new QueryConfig(), null, null);
}

@After
Expand Down
2 changes: 1 addition & 1 deletion test/outbackcdx/WebappTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void setUp() throws IOException, ConfigurationException {
UrlCanonicalizer canon = new UrlCanonicalizer(new ByteArrayInputStream(yaml.getBytes(UTF_8)));

DataStore manager = new DataStore(root, -1, null, Long.MAX_VALUE, canon);
webapp = new Webapp(manager, false, Collections.emptyMap(), canon, Collections.emptyMap(), 10000, new QueryConfig(), null);
webapp = new Webapp(manager, false, Collections.emptyMap(), canon, Collections.emptyMap(), 10000, new QueryConfig(), null, null);
}

@After
Expand Down

0 comments on commit e8d4a66

Please sign in to comment.