Skip to content

Commit

Permalink
First working implementation of SERVICE
Browse files Browse the repository at this point in the history
Basic principle: Send the query inside the SERVICE clause to the remote
endpoint and turn the result into a VALUES clause, which is then
processed using (an improved version of) existing code.

1. Update `parsedQuery::Values` to hold a table of `TripleComponent`s
instead of `std::string`s as before. Modify the `SparqlQleverVisitor`
accordingly.

2. Update the `Values : Operation` class to add OOV entries to the
`LocalVocab` (any row containing an oov entry was ignored so far).
Update the JOIN operation to propagate the localVocab if only of the
operands has one (which is a frequent use case).

3. Used the occasion to create a separate `LocalVocab` class with basic
functionality for adding words to a local vocabulary once.

4. Add a new class HttpClient which can open HTTP and HTTPS connections
to a given host, and which can then send GET or POST requests, receive
the result synchronously, and write it to a (potentially very large)
string. Uses Boost.Beast, just like our HttpServer.

5. Wrote a test for our HttpServer and HttpClient. However, the test so
far only tests the HTTP case because our HttpServer just does HTTP so
far. Also used the occasion to rename util/HttpServer to util/http
because httpServer was simply a misnomer. This change required very many
small changes (in includes and in the various CMakeLists.txt).
  • Loading branch information
Hannah Bast committed Nov 18, 2022
1 parent 77991dd commit db4774f
Show file tree
Hide file tree
Showing 19 changed files with 141 additions and 58 deletions.
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_library(engine
Union.cpp Union.h
MultiColumnJoin.cpp MultiColumnJoin.h
TransitivePath.cpp TransitivePath.h
Service.cpp Service.h
Values.cpp Values.h
Bind.cpp Bind.h
IdTable.h
Expand All @@ -39,5 +40,4 @@ add_library(engine
../util/Parameters.h RuntimeInformation.cpp CheckUsePatternTrick.cpp CheckUsePatternTrick.h
VariableToColumnMap.cpp)


target_link_libraries(engine index parser sparqlExpressions http SortPerformanceEstimator absl::flat_hash_set ${ICU_LIBRARIES} boost_iostreams)
2 changes: 2 additions & 0 deletions src/engine/CheckUsePatternTrick.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ bool isVariableContainedInGraphPatternOperation(
});
} else if constexpr (std::is_same_v<T, p::Values>) {
return ad_utility::contains(arg._inlineValues._variables, variable);
} else if constexpr (std::is_same_v<T, p::Service>) {
return check(arg.graphPattern_);
} else {
static_assert(std::is_same_v<T, p::TransPath>);
// The `TransPath` is set up later in the query planning, when this
Expand Down
4 changes: 4 additions & 0 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "engine/Join.h"
#include "engine/NeutralElementOperation.h"
#include "engine/OrderBy.h"
#include "engine/Service.h"
#include "engine/Sort.h"
#include "engine/TextOperationWithFilter.h"
#include "engine/TransitivePath.h"
Expand Down Expand Up @@ -612,6 +613,8 @@ void QueryExecutionTree::setOperation(std::shared_ptr<Op> operation) {
_type = DISTINCT;
} else if constexpr (std::is_same_v<Op, Values>) {
_type = VALUES;
} else if constexpr (std::is_same_v<Op, Service>) {
_type = SERVICE;
} else if constexpr (std::is_same_v<Op, TransitivePath>) {
_type = TRANSITIVE_PATH;
} else if constexpr (std::is_same_v<Op, OrderBy>) {
Expand Down Expand Up @@ -643,6 +646,7 @@ template void QueryExecutionTree::setOperation(std::shared_ptr<Bind>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Sort>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Distinct>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Values>);
template void QueryExecutionTree::setOperation(std::shared_ptr<Service>);
template void QueryExecutionTree::setOperation(std::shared_ptr<TransitivePath>);
template void QueryExecutionTree::setOperation(std::shared_ptr<OrderBy>);
template void QueryExecutionTree::setOperation(std::shared_ptr<GroupBy>);
Expand Down
1 change: 1 addition & 0 deletions src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class QueryExecutionTree {
MULTICOLUMN_JOIN,
TRANSITIVE_PATH,
VALUES,
SERVICE,
BIND,
MINUS,
NEUTRAL_ELEMENT
Expand Down
5 changes: 4 additions & 1 deletion src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <engine/OptionalJoin.h>
#include <engine/OrderBy.h>
#include <engine/QueryPlanner.h>
#include <engine/Service.h>
#include <engine/Sort.h>
#include <engine/TextOperationWithFilter.h>
#include <engine/TextOperationWithoutFilter.h>
Expand Down Expand Up @@ -453,7 +454,9 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::optimize(
SubtreePlan valuesPlan =
makeSubtreePlan<Values>(_qec, arg._inlineValues);
joinCandidates(std::vector{std::move(valuesPlan)});

} else if constexpr (std::is_same_v<T, p::Service>) {
SubtreePlan servicePlan = makeSubtreePlan<Service>(_qec, arg);
joinCandidates(std::vector{std::move(servicePlan)});
} else if constexpr (std::is_same_v<T, p::Bind>) {
// The logic of the BIND operation is implemented in the joinCandidates
// lambda. Reason: BIND does not add a new join operation like for the
Expand Down
2 changes: 2 additions & 0 deletions src/engine/ResultTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <cassert>

#include "engine/LocalVocab.h"
#include "global/Id.h"
#include "global/ValueId.h"

// _____________________________________________________________________________
ResultTable::ResultTable(ad_utility::AllocatorWithLimit<Id> allocator)
Expand Down
1 change: 1 addition & 0 deletions src/engine/Sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ void Sort::computeResult(ResultTable* result) {
result->_resultTypes.insert(result->_resultTypes.end(),
subRes->_resultTypes.begin(),
subRes->_resultTypes.end());
// TODO: shouldn't this be moved?
result->_localVocab = subRes->_localVocab;
result->_idTable.insert(result->_idTable.end(), subRes->_idTable.begin(),
subRes->_idTable.end());
Expand Down
5 changes: 5 additions & 0 deletions src/parser/GraphPatternOperation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ void GraphPatternOperation::toString(std::ostringstream& os,
} else if constexpr (std::is_same_v<T, Values>) {
os << "VALUES (" << arg._inlineValues.variablesToString() << ") "
<< arg._inlineValues.valuesToString();
} else if constexpr (std::is_same_v<T, Service>) {
os << "SERVICE " << arg.serviceVarOrIri_ << " {";
// TODO: In other places, the interface is `os << ...asString(indent)`.
arg.graphPattern_.toString(os, indentation);
os << "}";
} else if constexpr (std::is_same_v<T, BasicGraphPattern>) {
for (size_t i = 0; i + 1 < arg._triples.size(); ++i) {
os << "\n";
Expand Down
14 changes: 13 additions & 1 deletion src/parser/GraphPatternOperation.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ class SparqlValues {
std::string valuesToString() const;
};

/// A `SERVICE` clause.
struct Service {
// The graph pattern of the service clause.
GraphPattern graphPattern_;
// The IRI of the service clause.
std::string serviceVarOrIri_;
// The prologue (prefix definitions).
std::string servicePrologue_;
// The body of the SPARQL query for the remote endpoint.
std::string serviceQueryBody_;
};

/// A `BasicGraphPattern` represents a consecutive block of triples.
struct BasicGraphPattern {
std::vector<SparqlTriple> _triples;
Expand Down Expand Up @@ -147,7 +159,7 @@ struct Bind {
// class actually becomes `using GraphPatternOperation = std::variant<...>`
using GraphPatternOperationVariant =
std::variant<Optional, Union, Subquery, TransPath, Bind, BasicGraphPattern,
Values, Minus, GroupGraphPattern>;
Values, Service, Minus, GroupGraphPattern>;
struct GraphPatternOperation
: public GraphPatternOperationVariant,
public VisitMixin<GraphPatternOperation, GraphPatternOperationVariant> {
Expand Down
1 change: 1 addition & 0 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,7 @@ void ParsedQuery::GraphPattern::recomputeIds(size_t* id_count) {
arg._id = (*id_count)++;
} else {
static_assert(std::is_same_v<T, parsedQuery::Subquery> ||
std::is_same_v<T, parsedQuery::Service> ||
std::is_same_v<T, parsedQuery::BasicGraphPattern> ||
std::is_same_v<T, parsedQuery::Bind>);
// subquery children have their own id space
Expand Down
30 changes: 25 additions & 5 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ SparqlExpressionPimpl Visitor::visitExpressionPimpl(auto* ctx,

// ____________________________________________________________________________________
ParsedQuery Visitor::visit(Parser::QueryContext* ctx) {
// Remember the full query string (once for the whole query).
fullQueryString_ = ctx->getStart()->getInputStream()->toString();

// The prologue (BASE and PREFIX declarations) only affects the internal
// state of the visitor.
visit(ctx->prologue());
Expand Down Expand Up @@ -435,15 +438,25 @@ GraphPatternOperation Visitor::visit(Parser::OptionalGraphPatternContext* ctx) {
}

// ____________________________________________________________________________________
parsedQuery::GraphPatternOperation Visitor::visit(
Parser::GraphGraphPatternContext* ctx) {
reportNotSupported(ctx, "Named Graphs (FROM, GRAPH) are");
parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) {
std::string serviceVarOrIri = ctx->varOrIri()->getText();
auto ggp = ctx->groupGraphPattern();
size_t ggpStart = ggp->getStart()->getStartIndex();
size_t ggpStop = ggp->getStop()->getStopIndex();
parsedQuery::Service parsedServiceClause;
parsedServiceClause.graphPattern_ = visit(ctx->groupGraphPattern());
parsedServiceClause.serviceVarOrIri_ = serviceVarOrIri;
parsedServiceClause.servicePrologue_ = prologueString_;
parsedServiceClause.serviceQueryBody_ =
std::string{ad_utility::getUTF8Substring(fullQueryString_, ggpStart,
ggpStop - ggpStart + 1)};
return parsedServiceClause;
}

// ____________________________________________________________________________________
parsedQuery::GraphPatternOperation Visitor::visit(
Parser::ServiceGraphPatternContext* ctx) {
reportNotSupported(ctx, "Federated queries (SERVICE) are");
Parser::GraphGraphPatternContext* ctx) {
reportNotSupported(ctx, "Named Graphs (FROM, GRAPH) are");
}

// ____________________________________________________________________________________
Expand Down Expand Up @@ -570,6 +583,13 @@ string Visitor::visit(Parser::PnameNsContext* ctx) {
void Visitor::visit(Parser::PrologueContext* ctx) {
visitVector(ctx->baseDecl());
visitVector(ctx->prefixDecl());
// Remember the whole prologue (we need this for the SERVICE clause later).
if (ctx->getStart() && ctx->getStop()) {
size_t prologueStart = ctx->getStart()->getStartIndex();
size_t prologueStop = ctx->getStop()->getStopIndex();
prologueString_ = std::string{ad_utility::getUTF8Substring(
fullQueryString_, prologueStart, prologueStop - prologueStart + 1)};
}
}

// ____________________________________________________________________________________
Expand Down
5 changes: 4 additions & 1 deletion src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ class SparqlQleverVisitor {
// currently parsing.
std::vector<std::vector<Variable>> visibleVariables_{{}};
PrefixMap prefixMap_{};
// The following two are needed for SERVICE:
std::string fullQueryString_;
std::string prologueString_;

public:
SparqlQleverVisitor() = default;
Expand Down Expand Up @@ -187,7 +190,7 @@ class SparqlQleverVisitor {
[[noreturn]] parsedQuery::GraphPatternOperation visit(
Parser::GraphGraphPatternContext* ctx);

[[noreturn]] parsedQuery::GraphPatternOperation visit(
[[nodiscard]] parsedQuery::Service visit(
Parser::ServiceGraphPatternContext* ctx);

[[nodiscard]] parsedQuery::GraphPatternOperation visit(
Expand Down
4 changes: 1 addition & 3 deletions src/util/BoostHelpers/AsyncWaitForFuture.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
#define QLEVER_ASYNCWAITFORFUTURE_H
// Inspired by https://gist.github.com/inetic/dc9081baf45ec4b60037

#include <future>

#include "util/http/beast.h"
#include <util/http/beast.h>

namespace ad_utility::asio_helpers {

Expand Down
2 changes: 1 addition & 1 deletion src/util/http/HttpClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ HttpClientImpl<StreamType>::~HttpClientImpl() noexcept(false) {
boost::system::error_code ec;
if constexpr (std::is_same_v<StreamType, beast::tcp_stream>) {
stream_->socket().shutdown(tcp::socket::shutdown_both, ec);
// `not_connected happens sometimes, so don't bother reporting it.
// `not_connected` happens sometimes, so don't bother reporting it.
if (ec && ec != beast::errc::not_connected) {
if (std::uncaught_exceptions() == 0) {
throw beast::system_error{ec};
Expand Down
17 changes: 17 additions & 0 deletions src/util/http/HttpServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#ifndef QLEVER_HTTPSERVER_H
#define QLEVER_HTTPSERVER_H

#include <util/http/beast.h>

#include <cstdlib>
#include <semaphore>

Expand Down Expand Up @@ -183,6 +185,21 @@ class HttpServer {
// TODO<joka921> is this even needed, to my understanding,
// nothing may happen in parallel within an HTTP session, but
// then again this does no harm.

// auto handleExceptionInSession = [](std::exception_ptr e) {
// try {
// if (e) {
// std::rethrow_exception(e);
// }
// } catch (const std::exception& error) {
// LOG(ERROR) << "LISTENER caught exception: " << error.what()
// << std::endl;
// if (std::string(error.what()).starts_with("EXIT")) {
// LOG(INFO) << "Exiting server loop ..." << std::endl;
// }
// }
// };

net::co_spawn(net::make_strand(_ioContext), session(std::move(socket)),
net::detached);
} catch (const boost::system::system_error& b) {
Expand Down
17 changes: 10 additions & 7 deletions src/util/http/HttpUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,23 @@ static constexpr char urlRegexString[] =
static constexpr auto urlRegex = ctll::fixed_string(urlRegexString);

// ____________________________________________________________________________
UrlComponents::UrlComponents(const std::string_view url) {
Url::Url(const std::string_view url) {
auto match = ctre::search<urlRegex>(url);
if (!match) {
throw std::runtime_error(
absl::StrCat("URL malformed, must match regex ", urlRegexString));
}
protocol =
protocol_ =
match.get<1>().to_string() == "http" ? Protocol::HTTP : Protocol::HTTPS;
host = match.get<2>().to_string();
port = match.get<4>().to_string();
if (port.empty()) {
port = protocol == Protocol::HTTP ? "80" : "443";
host_ = match.get<2>().to_string();
port_ = match.get<4>().to_string();
if (port_.empty()) {
port_ = protocol_ == Protocol::HTTP ? "80" : "443";
}
target_ = match.get<5>().to_string();
if (target_.empty()) {
target_ = "/";
}
target = match.get<5>().to_string();
}

} // namespace ad_utility::httpUtils
46 changes: 28 additions & 18 deletions src/util/http/HttpUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,32 +32,42 @@ using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp>
namespace streams = ad_utility::streams;
using ad_utility::httpUtils::httpStreams::streamable_body;

/// The components of a URL. For example, the components of the URL
/// Simple URL class that provides "getters" for the various URL components
/// (some of the components are implicit, such as the port, so these are not
/// really getters). For example, the components of the URL
/// https://qlever.cs.uni-freiburg.de/api/wikidata are:
///
/// protocol: HTTPS
/// host: qlever.cs.uni-freiburg.de
/// port: 443 (implicit)
/// target: /api/wikidata .
///
/// NOTE: `host` and `target` could be `std::string_view` because they are parts
/// of the given URL. However, `port` can be implicit, so we need a
/// `std::string` here (and it's not an `int` because the Beast functions ask
/// for the port as a string). Since URLs are short and we do not handle large
/// numbers of URLs, the overhead of the string copies are negligible.
struct UrlComponents {
class Url {
public:
enum class Protocol { HTTP, HTTPS };

private:
Protocol protocol_;
std::string host_;
std::string port_;
std::string target_;

public:
// Construct from given URL.
UrlComponents(const std::string_view url);
// Members.
enum Protocol { HTTP, HTTPS } protocol;
std::string host;
std::string port;
std::string target;
// For testing.
friend std::ostream& operator<<(std::ostream& os, const UrlComponents& uc) {
return os << "UrlComponents("
<< (uc.protocol == Protocol::HTTP ? "http" : "https") << ", "
<< uc.host << ", " << uc.port << ", " << uc.target << ")";
Url(const std::string_view url);
// The protocol: one of Protocol::HTTP or Protocol::HTTPS.
Protocol protocol() const { return protocol_; }
// The host; this is always a substring of the given URL.
std::string host() const { return host_; }
// The port; inferred from the protocol if not specified explicitly (80 for
// HTTP, 443 for HTTPS).
std::string port() const { return port_; }
// The target; this is a substring of the given URL, except when it's empty in
// the URL, then it's "/".
std::string target() const { return target_; }
// The protocol as string.
std::string protocolAsString() const {
return protocol_ == Protocol::HTTP ? "http" : "https";
}
};

Expand Down
Loading

0 comments on commit db4774f

Please sign in to comment.