Skip to content

Commit

Permalink
Reuse HttpClient throughout a Connection session
Browse files Browse the repository at this point in the history
Enables http/2 connection reuse

#2257
  • Loading branch information
jhy committed Jan 13, 2025
1 parent 6aa1b71 commit a62c7f3
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 30 deletions.
11 changes: 8 additions & 3 deletions src/main/java/org/jsoup/helper/HttpConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ public class HttpConnection implements Connection {
static final String DefaultUploadType = "application/octet-stream";
private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");

private HttpConnection.Request req;
private Connection.@Nullable Response res;
@Nullable Object client; // The HttpClient for this Connection, if via the HttpClientExecutor
@Nullable RequestAuthenticator lastAuth; // The previous Authenticator used by this Connection, if via the HttpClientExecutor

/**
Create a new Connection, with the request URL specified.
@param url the URL to fetch from
Expand Down Expand Up @@ -97,6 +102,7 @@ public static Connection connect(URL url) {
*/
public HttpConnection() {
req = new Request();
req.connection = this;
}

/**
Expand All @@ -112,9 +118,6 @@ static String encodeMimeName(String val) {
return val.replace("\"", "%22");
}

private HttpConnection.Request req;
private Connection.@Nullable Response res;

@Override
public Connection newRequest() {
// copy the prototype request for the different settings, cookie manager, etc
Expand Down Expand Up @@ -593,6 +596,7 @@ public static class Request extends HttpConnection.Base<Connection.Request> impl
// make sure that we can send Sec-Fetch-Site headers etc.
}

HttpConnection connection;
private @Nullable Proxy proxy;
private int timeoutMilliseconds;
private int maxBodySizeBytes;
Expand Down Expand Up @@ -627,6 +631,7 @@ public static class Request extends HttpConnection.Base<Connection.Request> impl

Request(Request copy) {
super(copy);
connection = copy.connection;
proxy = copy.proxy;
postDataCharset = copy.postDataCharset;
timeoutMilliseconds = copy.timeoutMilliseconds;
Expand Down
74 changes: 47 additions & 27 deletions src/main/java11/org/jsoup/helper/HttpClientExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import static org.jsoup.helper.HttpConnection.Response;
import static org.jsoup.helper.HttpConnection.Response.writePost;
Expand All @@ -28,42 +28,57 @@
property {@code jsoup.useHttpClient} to {@code true}.
*/
class HttpClientExecutor extends RequestExecutor {
// HttpClient expects proxy settings per client; we do per request, so held as a thread local. Can't do same for
// auth because that callback is on a worker thread, so can only do auth per Connection. So we create a new client
// if the authenticator is different between requests
static ThreadLocal<Proxy> perRequestProxy = new ThreadLocal<>();

@Nullable
HttpResponse<InputStream> hRes;

public HttpClientExecutor(HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) {
super(request, previousResponse);
}

/**
Retrieve the HttpClient from the Connection, or create a new one. Allows for connection pooling of requests in the
same Connection (session).
*/
HttpClient client() {
// we try to reuse the same Client across requests in a given Connection; but if the request auth has changed, we need to create a new client
RequestAuthenticator prevAuth = req.connection.lastAuth;
req.connection.lastAuth = req.authenticator;
if (req.connection.client != null && prevAuth == req.authenticator) { // might both be null
return (HttpClient) req.connection.client;
}

HttpClient.Builder builder = HttpClient.newBuilder();
builder.followRedirects(HttpClient.Redirect.NEVER); // customized redirects
builder.proxy(new ProxyWrap()); // thread local impl for per request; called on executing thread
if (req.authenticator != null) builder.authenticator(new AuthenticationHandler(req.authenticator));

HttpClient client = builder.build();
req.connection.client = client;
return client;
}

@Override
HttpConnection.Response execute() throws IOException {
try {
HttpClient.Builder builder = HttpClient.newBuilder();
Proxy proxy = req.proxy();
if (proxy != null) builder.proxy(new ProxyWrap(proxy));
builder.followRedirects(HttpClient.Redirect.NEVER); // customized redirects
//builder.connectTimeout(Duration.ofMillis(req.timeout()/2)); // jsoup timeout is total connect + all reads
// todo - how to handle socketfactory? HttpClient wants SSLContext...
if (req.authenticator != null) {
AuthenticationHandler.AuthShim handler = new RequestAuthHandler();
handler.enable(req.authenticator, builder);
}
HttpClient client = builder.build();

HttpRequest.Builder reqBuilder =
HttpRequest.newBuilder(req.url.toURI()).method(req.method.name(), requestBody(req));
if (req.timeout() > 0) reqBuilder.timeout(
Duration.ofMillis(req.timeout())); // infinite if unset (UrlConnection / jsoup uses 0 for same)
CookieUtil.applyCookiesToRequest(req, reqBuilder::header);

// headers:
for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) {
for (String value : header.getValue()) {
reqBuilder.header(header.getKey(), value);
}
}
req.multiHeaders().forEach((key, values) -> {
values.forEach(value -> reqBuilder.header(key, value));
});

if (req.proxy() != null) perRequestProxy.set(req.proxy()); // set up per request proxy
HttpRequest hReq = reqBuilder.build();
HttpClient client = client();
hRes = client.send(hReq, HttpResponse.BodyHandlers.ofInputStream());
HttpHeaders headers = hRes.headers();

Expand All @@ -84,9 +99,13 @@ HttpConnection.Response execute() throws IOException {
throw e;
} catch (InterruptedException e) {
safeClose();
Thread.currentThread().interrupt();
throw new IOException(e);
} catch (URISyntaxException e) {
throw new IllegalArgumentException("Malformed URL: " + req.url, e);
} finally {
// detach per request proxy
perRequestProxy.remove();
}
}

Expand All @@ -99,8 +118,12 @@ InputStream responseBody() throws IOException {
@Override
void safeClose() {
if (hRes != null) {
// no real closer
// todo - review
InputStream body = hRes.body();
if (body != null) {
try {
body.close();
} catch (IOException ignored) {}
}
hRes = null;
}
}
Expand All @@ -116,16 +139,13 @@ static HttpRequest.BodyPublisher requestBody(final HttpConnection.Request req) t
}

static class ProxyWrap extends ProxySelector {
final List<Proxy> proxies;

public ProxyWrap(Proxy proxy) {
this.proxies = new ArrayList<>(1);
proxies.add(proxy);
}
// empty list for no proxy:
static final List<Proxy> NoProxy = new ArrayList<>(0);

@Override
public List<Proxy> select(URI uri) {
return proxies;
Proxy proxy = perRequestProxy.get();
return proxy != null ? Collections.singletonList(proxy) : NoProxy;
}

@Override
Expand Down

0 comments on commit a62c7f3

Please sign in to comment.