Skip to content

Commit

Permalink
Improve HTTP utils and tests
Browse files Browse the repository at this point in the history
Factored out from PR #793 (Basic support of SERVICE clause)
  • Loading branch information
Hannah Bast committed Feb 20, 2023
1 parent dec12f6 commit e7d3263
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 110 deletions.
32 changes: 25 additions & 7 deletions src/util/http/HttpClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ using tcp = boost::asio::ip::tcp;

// ____________________________________________________________________________
template <typename StreamType>
HttpClientImpl<StreamType>::HttpClientImpl(const std::string& host,
const std::string& port) {
HttpClientImpl<StreamType>::HttpClientImpl(std::string_view host,
std::string_view port) {
// IMPORTANT implementation note: Although we need only `stream_` later, it
// is important that we also keep `io_context_` and `ssl_context_` alive.
// Otherwise, we get a nasty and non-deterministic segmentation fault when
Expand All @@ -42,7 +42,8 @@ HttpClientImpl<StreamType>::HttpClientImpl(const std::string& host,
ssl_context_->set_verify_mode(ssl::verify_none);
tcp::resolver resolver{io_context_};
stream_ = std::make_unique<StreamType>(io_context_, *ssl_context_);
if (!SSL_set_tlsext_host_name(stream_->native_handle(), host.c_str())) {
if (!SSL_set_tlsext_host_name(stream_->native_handle(),
std::string{host}.c_str())) {
boost::system::error_code ec{static_cast<int>(::ERR_get_error()),
boost::asio::error::get_ssl_category()};
throw boost::system::system_error{ec};
Expand All @@ -59,7 +60,7 @@ HttpClientImpl<StreamType>::~HttpClientImpl() noexcept(false) {
boost::system::error_code ec;
if constexpr (std::is_same_v<StreamType, beast::tcp_stream>) {
stream_->socket().shutdown(tcp::socket::shutdown_both, ec);
// `not_connected happens sometimes, so don't bother reporting it.
// `not_connected` happens sometimes, so don't bother reporting it.
if (ec && ec != beast::errc::not_connected) {
if (std::uncaught_exceptions() == 0) {
throw beast::system_error{ec};
Expand All @@ -86,9 +87,9 @@ HttpClientImpl<StreamType>::~HttpClientImpl() noexcept(false) {
// ____________________________________________________________________________
template <typename StreamType>
std::istringstream HttpClientImpl<StreamType>::sendRequest(
const boost::beast::http::verb& method, const std::string& host,
const std::string& target, const std::string& requestBody,
const std::string& contentTypeHeader, const std::string& acceptHeader) {
const boost::beast::http::verb& method, std::string_view host,
std::string_view target, std::string_view requestBody,
std::string_view contentTypeHeader, std::string_view acceptHeader) {
// Check that we have a stream (obtained via a call to `openStream` above).
if (!stream_) {
throw std::runtime_error("Trying to send request without connection");
Expand Down Expand Up @@ -120,3 +121,20 @@ std::istringstream HttpClientImpl<StreamType>::sendRequest(
// Explicit instantiations for HTTP and HTTPS, see the bottom of `HttpClient.h`.
template class HttpClientImpl<beast::tcp_stream>;
template class HttpClientImpl<ssl::stream<tcp::socket>>;

// ____________________________________________________________________________
std::istringstream sendHttpOrHttpsRequest(
ad_utility::httpUtils::Url url, const boost::beast::http::verb& method,
std::string_view requestData, std::string_view contentTypeHeader,
std::string_view acceptHeader) {
auto sendRequest = [&]<typename Client>() {
Client client{url.host(), url.port()};
return client.sendRequest(method, url.host(), url.target(), requestData,
contentTypeHeader, acceptHeader);
};
if (url.protocol() == ad_utility::httpUtils::Url::Protocol::HTTP) {
return sendRequest.operator()<HttpClient>();
} else {
return sendRequest.operator()<HttpsClient>();
}
}
22 changes: 17 additions & 5 deletions src/util/http/HttpClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <sstream>
#include <string>

#include "util/http/HttpUtils.h"
#include "util/http/beast.h"

// A class for basic communication with a remote server via HTTP or HTTPS. For
Expand All @@ -31,7 +32,7 @@ template <typename StreamType>
class HttpClientImpl {
public:
// The constructor sets up the connection to the client.
HttpClientImpl(const std::string& host, const std::string& port);
HttpClientImpl(std::string_view host, std::string_view port);

// The destructor closes the connection.
~HttpClientImpl() noexcept(false);
Expand All @@ -44,10 +45,10 @@ class HttpClientImpl {
// TODO: Read and process the response in chunks. Here is a code example:
// https://stackoverflow.com/questions/69011767/handling-large-http-response-using-boostbeast
std::istringstream sendRequest(
const boost::beast::http::verb& method, const std::string& host,
const std::string& target, const std::string& requestBody = "",
const std::string& contentTypeHeader = "text/plain",
const std::string& acceptHeader = "text/plain");
const boost::beast::http::verb& method, std::string_view host,
std::string_view target, std::string_view requestBody = "",
std::string_view contentTypeHeader = "text/plain",
std::string_view acceptHeader = "text/plain");

private:
// The connection stream and associated objects. See the implementation of
Expand All @@ -63,3 +64,14 @@ using HttpClient = HttpClientImpl<boost::beast::tcp_stream>;
// Instantiation for HTTPS.
using HttpsClient =
HttpClientImpl<boost::asio::ssl::stream<boost::asio::ip::tcp::socket>>;

// Global convenience function for sending a request (default: GET) to the given
// URL and obtaining the result as a `std::istringstream`. The protocol (HTTP or
// HTTPS) is chosen automatically based on the URL. The `requestBody` is the
// payload sent for POST requests (default: empty).
std::istringstream sendHttpOrHttpsRequest(
ad_utility::httpUtils::Url url,
const boost::beast::http::verb& method = boost::beast::http::verb::get,
std::string_view postData = "",
std::string_view contentTypeHeader = "text/plain",
std::string_view acceptHeader = "text/plain");
17 changes: 10 additions & 7 deletions src/util/http/HttpUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,23 @@ static constexpr char urlRegexString[] =
static constexpr auto urlRegex = ctll::fixed_string(urlRegexString);

// ____________________________________________________________________________
UrlComponents::UrlComponents(const std::string_view url) {
Url::Url(const std::string_view url) {
auto match = ctre::search<urlRegex>(url);
if (!match) {
throw std::runtime_error(
absl::StrCat("URL malformed, must match regex ", urlRegexString));
}
protocol =
protocol_ =
match.get<1>().to_string() == "http" ? Protocol::HTTP : Protocol::HTTPS;
host = match.get<2>().to_string();
port = match.get<4>().to_string();
if (port.empty()) {
port = protocol == Protocol::HTTP ? "80" : "443";
host_ = match.get<2>().to_string();
port_ = match.get<4>().to_string();
if (port_.empty()) {
port_ = protocol_ == Protocol::HTTP ? "80" : "443";
}
target_ = match.get<5>().to_string();
if (target_.empty()) {
target_ = "/";
}
target = match.get<5>().to_string();
}

} // namespace ad_utility::httpUtils
52 changes: 33 additions & 19 deletions src/util/http/HttpUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include <string_view>

#include "absl/strings/str_cat.h"
#include "nlohmann/json.hpp"
#include "util/AsyncStream.h"
#include "util/CompressorStream.h"
#include "util/StringUtils.h"
Expand All @@ -19,6 +18,7 @@
#include "util/http/UrlParser.h"
#include "util/http/beast.h"
#include "util/http/streamable_body.h"
#include "util/json.h"
#include "util/stream_generator.h"

/// Several utilities for using/customizing the HttpServer template from
Expand All @@ -32,32 +32,46 @@ using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
namespace streams = ad_utility::streams;
using ad_utility::httpUtils::httpStreams::streamable_body;

/// The components of a URL. For example, the components of the URL
/// Simple URL class that provides "getters" for the various URL components
/// (some of the components are implicit, such as the port, so these are not
/// really getters). For example, the components of the URL
/// https://qlever.cs.uni-freiburg.de/api/wikidata are:
///
/// protocol: HTTPS
/// host: qlever.cs.uni-freiburg.de
/// port: 443 (implicit)
/// target: /api/wikidata .
///
/// NOTE: `host` and `target` could be `std::string_view` because they are parts
/// of the given URL. However, `port` can be implicit, so we need a
/// `std::string` here (and it's not an `int` because the Beast functions ask
/// for the port as a string). Since URLs are short and we do not handle large
/// numbers of URLs, the overhead of the string copies are negligible.
struct UrlComponents {
class Url {
public:
enum class Protocol { HTTP, HTTPS };

private:
Protocol protocol_;
std::string host_;
std::string port_;
std::string target_;

public:
// Construct from given URL.
UrlComponents(const std::string_view url);
// Members.
enum Protocol { HTTP, HTTPS } protocol;
std::string host;
std::string port;
std::string target;
// For testing.
friend std::ostream& operator<<(std::ostream& os, const UrlComponents& uc) {
return os << "UrlComponents("
<< (uc.protocol == Protocol::HTTP ? "http" : "https") << ", "
<< uc.host << ", " << uc.port << ", " << uc.target << ")";
Url(const std::string_view url);
// The protocol: one of Protocol::HTTP or Protocol::HTTPS.
Protocol protocol() const { return protocol_; }
// The host; this is always a substring of the given URL.
const std::string& host() const { return host_; }
// The port; inferred from the protocol if not specified explicitly (80 for
// HTTP, 443 for HTTPS).
const std::string& port() const { return port_; }
// The target; this is a substring of the given URL, except when it's empty in
// the URL, then it's "/".
const std::string& target() const { return target_; }
// The protocol as string.
std::string_view protocolAsString() const {
return protocol_ == Protocol::HTTP ? "http" : "https";
}
// The whole URL as a string again (with explicit port).
std::string asString() const {
return absl::StrCat(protocolAsString(), "://", host_, ":", port_, target_);
}
};

Expand Down
67 changes: 13 additions & 54 deletions test/HttpTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,20 @@
#include <chrono>
#include <thread>

#include "./HttpTestHelpers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "util/http/HttpClient.h"
#include "util/http/HttpServer.h"
#include "util/http/HttpUtils.h"

using namespace ad_utility::httpUtils;
using namespace std::literals;

TEST(HttpServer, HttpTest) {
// A simple HTTP session handler, which replies with three lines: the request
// method (GET, POST, or OTHER), a copy of the request target (might be
// empty), and a copy of the request body (might be empty).
auto mirroringHttpSessionHandler =
[](auto request, auto&& send) -> boost::asio::awaitable<void> {
// Create and run a HTTP server, which replies to each request with three
// lines: the request method (GET, POST, or OTHER), a copy of the request
// target (might be empty), and a copy of the request body (might be empty).
TestHttpServer httpServer([](auto request,
auto&& send) -> boost::asio::awaitable<void> {
std::string methodName;
switch (request.method()) {
case http::verb::get:
Expand All @@ -37,49 +36,8 @@ TEST(HttpServer, HttpTest) {
absl::StrCat(methodName, "\n", request.target(), "\n", request.body());
co_return co_await send(
createOkResponse(response, request, ad_utility::MediaType::textPlain));
};
// Set up a HTTP server and run it. Try out 10 different ports, if connection
// to all of them fail, the test fails.
//
// TODO: Is there a more robust way to do this? Should we try out more ports?
auto httpServer = [&mirroringHttpSessionHandler]() {
std::vector<short unsigned int> ports(10);
std::generate(ports.begin(), ports.end(),
[]() { return 1024 + std::rand() % (65535 - 1024); });
const std::string& ipAddress = "0.0.0.0";
int numServerThreads = 1;
for (const short unsigned int port : ports) {
try {
using Server = HttpServer<decltype(mirroringHttpSessionHandler)>;
return std::make_shared<Server>(port, ipAddress, numServerThreads,
mirroringHttpSessionHandler);
} catch (const boost::system::system_error& b) {
LOG(INFO) << "Starting test HTTP server on port " << port
<< " failed, trying next port ..." << std::endl;
}
}
throw std::runtime_error(
absl::StrCat("Could not start test HTTP server on any of these ports: ",
absl::StrJoin(ports, ", ")));
}();

// Run the server in its own thread. Wait for 100ms until the server is
// up (it should be up immediately).
//
// NOTE: It is important to *copy* the `httpServer` pointer into the thread.
// That way, whoever dies first (this thread or the `httpServerThread`), the
// pointer is still valid in the other thread.
std::jthread httpServerThread([httpServer]() { httpServer->run(); });
auto waitTimeUntilServerIsUp = 100ms;
std::this_thread::sleep_for(waitTimeUntilServerIsUp);
if (!httpServer->serverIsReady()) {
// Detach the server thread (the `run()` above never terminates), so that we
// can exit this test.
httpServerThread.detach();
throw std::runtime_error(absl::StrCat("HttpServer was not up after ",
waitTimeUntilServerIsUp.count(),
"ms, this should not happen"));
}
});
httpServer.runInOwnThread();

// Helper lambdas for testing GET and POST requests.
auto testGetRequest = [](HttpClient* httpClient, const std::string& target) {
Expand All @@ -97,22 +55,23 @@ TEST(HttpServer, HttpTest) {
// First session (checks whether client and server can communicate as they
// should).
{
HttpClient httpClient("localhost", std::to_string(httpServer->getPort()));
HttpClient httpClient("localhost", std::to_string(httpServer.getPort()));
testGetRequest(&httpClient, "target1");
testPostRequest(&httpClient, "target1", "body1");
}

// Second session (checks if everything is still fine with the server after we
// have communicated with it for one session).
{
HttpClient httpClient("localhost", std::to_string(httpServer->getPort()));
HttpClient httpClient("localhost", std::to_string(httpServer.getPort()));
testGetRequest(&httpClient, "target2");
testPostRequest(&httpClient, "target2", "body2");
}

// Third session (check that after shutting down, no more new connections are
// being accepted.
httpServer->shutDown();
ASSERT_THROW(HttpClient("localhost", std::to_string(httpServer->getPort())),
// httpServer->shutDown();
httpServer.shutDown();
ASSERT_THROW(HttpClient("localhost", std::to_string(httpServer.getPort())),
std::exception);
}
Loading

0 comments on commit e7d3263

Please sign in to comment.