diff --git a/resources/outbackcdx/replay.js b/resources/outbackcdx/replay.js new file mode 100644 index 0000000..bc50359 --- /dev/null +++ b/resources/outbackcdx/replay.js @@ -0,0 +1,49 @@ +async function init() { + const path = window.location.pathname; + const segments = path.split("/"); + const collection = segments[1]; + const url = path.substring(segments[1].length + segments[2].length + 3); + + const collAdded = new Promise((resolve) => { + navigator.serviceWorker.addEventListener('message', (event) => { + if (event.data.msg_type === 'collAdded') { + resolve(); + } + }); + }); + + await navigator.serviceWorker.register("/sw.js"); + + if (navigator.serviceWorker.controller || (await navigator.serviceWorker.ready).active) { + navigator.serviceWorker.controller.postMessage({ + msg_type: 'addColl', + name: collection, + type: 'live', + file: {'sourceUrl': 'proxy:'}, + extraConfig: { + baseUrl: window.location.href, + isLive: false, + archivePrefix: '/' + segments[1] + '/', + } + }); + } + + window.addEventListener('message', event => { + let data = event.data; + if (data.wb_type === 'title') { + document.title = data.title; + } else if (data.wb_type === 'replace-url') { + history.replaceState({}, data.title, '/' + collection + '/' + data.ts + '/' + data.url) + } + }); + + await collAdded; + + const style = document.createElement("style"); + style.innerHTML = 'html, body, iframe { margin:0; padding:0; width: 100%; height: 100% }'; + const iframe = document.createElement('iframe'); + iframe.src = '/w/' + segments[1] + '/' + segments[2] + 'mp_/' + url; + document.body.append(style, iframe); +} + +init(); \ No newline at end of file diff --git a/src/outbackcdx/Main.java b/src/outbackcdx/Main.java index 4c03e8b..9e2b5b8 100644 --- a/src/outbackcdx/Main.java +++ b/src/outbackcdx/Main.java @@ -14,6 +14,8 @@ import java.net.*; import java.nio.channels.Channel; import java.nio.channels.ServerSocketChannel; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -39,8 +41,9 @@ public static void usage() { System.err.println(" --max-num-results N Max number of records to scan to calculate numresults statistic in the XML protocol (default 10000)"); System.err.println(" --omit-self-redirects Omit self redirects from query results by default"); System.err.println(" -p port Local port to listen on"); - System.err.println(" -t count Number of web server threads"); System.err.println(" -r count Cap on number of rocksdb records to scan to serve a single request"); + System.err.println(" --service-worker FILE Sets a JavaScript file to use as the replay service worker"); + System.err.println(" -t count Number of web server threads"); System.err.println(" --warc-base-url URL Enables replay of WARC records by reading WARC files with this URL prefix"); System.err.println(" -x Output CDX14 by default (instead of CDX11)"); System.err.println(" -v Verbose logging"); @@ -85,6 +88,7 @@ public static void main(String[] args) { Map computedFields = new HashMap<>(); QueryConfig queryConfig = new QueryConfig(); String warcBaseUrl = null; + String serviceWorker = null; Map dashboardConfig = new HashMap<>(); dashboardConfig.put("featureFlags", FeatureFlags.asMap()); @@ -168,6 +172,15 @@ public static void main(String[] args) { case "--batch-size": batchSize = Long.parseLong(args[++i]); break; + case "--service-worker": + Path path = Path.of(args[++i]); + try { + serviceWorker = Files.readString(path); + } catch (IOException e) { + System.err.println("Error reading service worker from " + path + ": " + e); + System.exit(1); + } + break; case "--warc-base-url": warcBaseUrl = args[++i]; break; @@ -190,7 +203,7 @@ public static void main(String[] args) { replay = new Replay(warcBaseUrl); } try (DataStore dataStore = new DataStore(dataPath, maxOpenSstFiles, replicationWindow, scanCap, canonicalizer)) { - Webapp controller = new Webapp(dataStore, verbose, dashboardConfig, canonicalizer, computedFields, maxNumResults, queryConfig, replay); + Webapp controller = new Webapp(dataStore, verbose, dashboardConfig, canonicalizer, computedFields, maxNumResults, queryConfig, replay, serviceWorker); if (undertow) { UWeb.UServer server = new UWeb.UServer(host, port, contextPath, controller, authorizer); server.start(); diff --git a/src/outbackcdx/Replay.java b/src/outbackcdx/Replay.java index 9204649..aca6f7e 100644 --- a/src/outbackcdx/Replay.java +++ b/src/outbackcdx/Replay.java @@ -7,6 +7,7 @@ import java.net.HttpURLConnection; import java.net.URI; import java.nio.file.Path; +import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.util.Locale; import java.util.Set; @@ -28,6 +29,16 @@ public Replay(String warcBaseUrl) { this.warcBaseUrl = warcBaseUrl; } + public Web.Response replay(Index index, String date, String url, String modifier, Web.Request request) throws IOException { + if (modifier.equals("id_")) { + return replayIdentity(index, date, url, request); + } else if (modifier.isEmpty()) { + return new Web.Response(200, "text/html", ""); + } else { + throw new IllegalArgumentException("modifier must be either id_ or empty"); + } + } + public Web.Response replayIdentity(Index index, String date, String url, Web.Request request) throws IOException { Capture capture = findClosestCapture(index, date, url); if (capture == null) return new Web.Response(NOT_FOUND, "text/plain", "Not in archive"); @@ -37,9 +48,10 @@ public Web.Response replayIdentity(Index index, String date, String url, Web.Req WarcRecord record = warcReader.next().orElse(null); if (record == null) throw new IOException("Missing WARC record"); + OffsetDateTime captureDate = record.date().atOffset(ZoneOffset.UTC); MultiMap headers = new MultiMap<>(); headers.add("Access-Control-Allow-Origin", "*"); - headers.add("Memento-Datetime", RFC_1123_DATE_TIME.format(record.date().atOffset(ZoneOffset.UTC))); + headers.add("Memento-Datetime", RFC_1123_DATE_TIME.format(captureDate)); if (record instanceof WarcResponse) { HttpResponse http = ((WarcResponse) record).http(); http.headers().map().forEach((name, values) -> { diff --git a/src/outbackcdx/Webapp.java b/src/outbackcdx/Webapp.java index e27e390..2529a47 100644 --- a/src/outbackcdx/Webapp.java +++ b/src/outbackcdx/Webapp.java @@ -38,6 +38,7 @@ class Webapp implements Web.Handler { private final long maxNumResults; private final WbCdxApi wbCdxApi; private final Replay replay; + private final String serviceWorker; private static ServiceLoader fpLoader = ServiceLoader.load(FilterPlugin.class); @@ -55,10 +56,11 @@ private Response deleteAccessRule(Web.Request req) throws IOException, Web.Respo return found ? ok() : notFound(); } - Webapp(DataStore dataStore, boolean verbose, Map dashboardConfig, UrlCanonicalizer canonicalizer, Map computedFields, long maxNumResults, QueryConfig queryConfig, Replay replay) { + Webapp(DataStore dataStore, boolean verbose, Map dashboardConfig, UrlCanonicalizer canonicalizer, Map computedFields, long maxNumResults, QueryConfig queryConfig, Replay replay, String serviceWorker) { this.dataStore = dataStore; this.verbose = verbose; this.dashboardConfig = dashboardConfig; + this.serviceWorker = serviceWorker; if (canonicalizer == null) { canonicalizer = new UrlCanonicalizer(); } @@ -86,6 +88,8 @@ private Response deleteAccessRule(Web.Request req) throws IOException, Web.Respo router.on(GET, "/database.svg", serve("database.svg")); router.on(GET, "/outback.svg", serve("outback.svg")); router.on(GET, "/favicon.ico", serve("outback.svg")); + router.on(GET, "/replay.js", serve("replay.js")); + router.on(GET, "/sw.js", this::serviceWorker); router.on(GET, "/swagger.json", serve("swagger.json")); router.on(GET, "/lib/vue-router/2.0.0/vue-router.js", serve("lib/vue-router/2.0.0/vue-router.js")); router.on(GET, "/lib/vue/" + version("org.webjars.npm", "vue") + "/vue.js", serve("/META-INF/resources/webjars/vue/" + version("org.webjars.npm", "vue") + "/dist/vue.js")); @@ -108,7 +112,7 @@ private Response deleteAccessRule(Web.Request req) throws IOException, Web.Respo router.on(POST, "//truncate_replication", request -> flushWal(request)); router.on(POST, "//compact", request -> compact(request), Permission.INDEX_EDIT); router.on(POST, "//upgrade", request -> upgrade(request), Permission.INDEX_EDIT); - router.on(GET, "//id_/", this::replayIdentity); + router.on(GET, "///", this::replay); if (FeatureFlags.experimentalAccessControl()) { router.on(GET, "//ap/", request -> query(request)); @@ -666,12 +670,18 @@ Response checkAccessBulk(Web.Request request) throws IOException, ResponseExcept return jsonResponse(responses); } - private Response replayIdentity(Request request) throws ResponseException, IOException { - if (replay == null) return new Response(404, "text/plain", "Replay not enabled (try setting --warc-base-url)"); + private Response replay(Request request) throws ResponseException, IOException { + if (replay == null) return new Response(NOT_FOUND, "text/plain", "Replay not configured (try setting --warc-base-url)"); String date = request.param("date"); String url = request.param("url"); + String modifier = request.param("modifier"); Index index = getIndex(request); - return replay.replayIdentity(index, date, url, request); + return replay.replay(index, date, url, modifier, request); + } + + private Response serviceWorker(Request request) { + if (serviceWorker == null) return new Response(NOT_FOUND, "text/plain", "Service worker not configured (try setting --service-worker)"); + return new Response(OK, "application/javascript", serviceWorker); } @Override diff --git a/test/outbackcdx/ReplicationFeaturesTest.java b/test/outbackcdx/ReplicationFeaturesTest.java index 7c4a76a..281de96 100644 --- a/test/outbackcdx/ReplicationFeaturesTest.java +++ b/test/outbackcdx/ReplicationFeaturesTest.java @@ -28,7 +28,7 @@ public class ReplicationFeaturesTest { public void setUp() throws IOException { File root = folder.newFolder(); manager = new DataStore(root, 256, null, Long.MAX_VALUE, null); - webapp = new Webapp(manager, false, Collections.emptyMap(), null, Collections.emptyMap(), 10000, new QueryConfig(), null); + webapp = new Webapp(manager, false, Collections.emptyMap(), null, Collections.emptyMap(), 10000, new QueryConfig(), null, null); } @After diff --git a/test/outbackcdx/WebappTest.java b/test/outbackcdx/WebappTest.java index 6c4f3d2..877629e 100644 --- a/test/outbackcdx/WebappTest.java +++ b/test/outbackcdx/WebappTest.java @@ -46,7 +46,7 @@ public void setUp() throws IOException, ConfigurationException { UrlCanonicalizer canon = new UrlCanonicalizer(new ByteArrayInputStream(yaml.getBytes(UTF_8))); DataStore manager = new DataStore(root, -1, null, Long.MAX_VALUE, canon); - webapp = new Webapp(manager, false, Collections.emptyMap(), canon, Collections.emptyMap(), 10000, new QueryConfig(), null); + webapp = new Webapp(manager, false, Collections.emptyMap(), canon, Collections.emptyMap(), 10000, new QueryConfig(), null, null); } @After