From a62c7f37d4132f3180bfef3e0d872d8e3cf87b5e Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Mon, 13 Jan 2025 16:31:01 +1100 Subject: [PATCH] Reuse HttpClient throughout a Connection session Enables http/2 connection reuse #2257 --- .../java/org/jsoup/helper/HttpConnection.java | 11 ++- .../org/jsoup/helper/HttpClientExecutor.java | 74 ++++++++++++------- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/jsoup/helper/HttpConnection.java b/src/main/java/org/jsoup/helper/HttpConnection.java index 05f2c58c6d..403a61465d 100644 --- a/src/main/java/org/jsoup/helper/HttpConnection.java +++ b/src/main/java/org/jsoup/helper/HttpConnection.java @@ -70,6 +70,11 @@ public class HttpConnection implements Connection { static final String DefaultUploadType = "application/octet-stream"; private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); + private HttpConnection.Request req; + private Connection.@Nullable Response res; + @Nullable Object client; // The HttpClient for this Connection, if via the HttpClientExecutor + @Nullable RequestAuthenticator lastAuth; // The previous Authenticator used by this Connection, if via the HttpClientExecutor + /** Create a new Connection, with the request URL specified. @param url the URL to fetch from @@ -97,6 +102,7 @@ public static Connection connect(URL url) { */ public HttpConnection() { req = new Request(); + req.connection = this; } /** @@ -112,9 +118,6 @@ static String encodeMimeName(String val) { return val.replace("\"", "%22"); } - private HttpConnection.Request req; - private Connection.@Nullable Response res; - @Override public Connection newRequest() { // copy the prototype request for the different settings, cookie manager, etc @@ -593,6 +596,7 @@ public static class Request extends HttpConnection.Base impl // make sure that we can send Sec-Fetch-Site headers etc. } + HttpConnection connection; private @Nullable Proxy proxy; private int timeoutMilliseconds; private int maxBodySizeBytes; @@ -627,6 +631,7 @@ public static class Request extends HttpConnection.Base impl Request(Request copy) { super(copy); + connection = copy.connection; proxy = copy.proxy; postDataCharset = copy.postDataCharset; timeoutMilliseconds = copy.timeoutMilliseconds; diff --git a/src/main/java11/org/jsoup/helper/HttpClientExecutor.java b/src/main/java11/org/jsoup/helper/HttpClientExecutor.java index 0b29c594ad..b7ed2955de 100644 --- a/src/main/java11/org/jsoup/helper/HttpClientExecutor.java +++ b/src/main/java11/org/jsoup/helper/HttpClientExecutor.java @@ -17,8 +17,8 @@ import java.net.http.HttpResponse; import java.time.Duration; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import java.util.Map; import static org.jsoup.helper.HttpConnection.Response; import static org.jsoup.helper.HttpConnection.Response.writePost; @@ -28,6 +28,11 @@ property {@code jsoup.useHttpClient} to {@code true}. */ class HttpClientExecutor extends RequestExecutor { + // HttpClient expects proxy settings per client; we do per request, so held as a thread local. Can't do same for + // auth because that callback is on a worker thread, so can only do auth per Connection. So we create a new client + // if the authenticator is different between requests + static ThreadLocal perRequestProxy = new ThreadLocal<>(); + @Nullable HttpResponse hRes; @@ -35,21 +40,31 @@ public HttpClientExecutor(HttpConnection.Request request, HttpConnection.@Nullab super(request, previousResponse); } + /** + Retrieve the HttpClient from the Connection, or create a new one. Allows for connection pooling of requests in the + same Connection (session). + */ + HttpClient client() { + // we try to reuse the same Client across requests in a given Connection; but if the request auth has changed, we need to create a new client + RequestAuthenticator prevAuth = req.connection.lastAuth; + req.connection.lastAuth = req.authenticator; + if (req.connection.client != null && prevAuth == req.authenticator) { // might both be null + return (HttpClient) req.connection.client; + } + + HttpClient.Builder builder = HttpClient.newBuilder(); + builder.followRedirects(HttpClient.Redirect.NEVER); // customized redirects + builder.proxy(new ProxyWrap()); // thread local impl for per request; called on executing thread + if (req.authenticator != null) builder.authenticator(new AuthenticationHandler(req.authenticator)); + + HttpClient client = builder.build(); + req.connection.client = client; + return client; + } + @Override HttpConnection.Response execute() throws IOException { try { - HttpClient.Builder builder = HttpClient.newBuilder(); - Proxy proxy = req.proxy(); - if (proxy != null) builder.proxy(new ProxyWrap(proxy)); - builder.followRedirects(HttpClient.Redirect.NEVER); // customized redirects - //builder.connectTimeout(Duration.ofMillis(req.timeout()/2)); // jsoup timeout is total connect + all reads - // todo - how to handle socketfactory? HttpClient wants SSLContext... - if (req.authenticator != null) { - AuthenticationHandler.AuthShim handler = new RequestAuthHandler(); - handler.enable(req.authenticator, builder); - } - HttpClient client = builder.build(); - HttpRequest.Builder reqBuilder = HttpRequest.newBuilder(req.url.toURI()).method(req.method.name(), requestBody(req)); if (req.timeout() > 0) reqBuilder.timeout( @@ -57,13 +72,13 @@ HttpConnection.Response execute() throws IOException { CookieUtil.applyCookiesToRequest(req, reqBuilder::header); // headers: - for (Map.Entry> header : req.multiHeaders().entrySet()) { - for (String value : header.getValue()) { - reqBuilder.header(header.getKey(), value); - } - } + req.multiHeaders().forEach((key, values) -> { + values.forEach(value -> reqBuilder.header(key, value)); + }); + if (req.proxy() != null) perRequestProxy.set(req.proxy()); // set up per request proxy HttpRequest hReq = reqBuilder.build(); + HttpClient client = client(); hRes = client.send(hReq, HttpResponse.BodyHandlers.ofInputStream()); HttpHeaders headers = hRes.headers(); @@ -84,9 +99,13 @@ HttpConnection.Response execute() throws IOException { throw e; } catch (InterruptedException e) { safeClose(); + Thread.currentThread().interrupt(); throw new IOException(e); } catch (URISyntaxException e) { throw new IllegalArgumentException("Malformed URL: " + req.url, e); + } finally { + // detach per request proxy + perRequestProxy.remove(); } } @@ -99,8 +118,12 @@ InputStream responseBody() throws IOException { @Override void safeClose() { if (hRes != null) { - // no real closer - // todo - review + InputStream body = hRes.body(); + if (body != null) { + try { + body.close(); + } catch (IOException ignored) {} + } hRes = null; } } @@ -116,16 +139,13 @@ static HttpRequest.BodyPublisher requestBody(final HttpConnection.Request req) t } static class ProxyWrap extends ProxySelector { - final List proxies; - - public ProxyWrap(Proxy proxy) { - this.proxies = new ArrayList<>(1); - proxies.add(proxy); - } + // empty list for no proxy: + static final List NoProxy = new ArrayList<>(0); @Override public List select(URI uri) { - return proxies; + Proxy proxy = perRequestProxy.get(); + return proxy != null ? Collections.singletonList(proxy) : NoProxy; } @Override