Skip to content

Commit

Permalink
feat: ETLng task manager (#1843)
Browse files Browse the repository at this point in the history
  • Loading branch information
godexsoft authored Jan 29, 2025
1 parent 3e200d8 commit 73f375f
Show file tree
Hide file tree
Showing 19 changed files with 769 additions and 29 deletions.
2 changes: 1 addition & 1 deletion src/etlng/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ add_library(clio_etlng)

target_sources(
clio_etlng PRIVATE impl/AmendmentBlockHandler.cpp impl/AsyncGrpcCall.cpp impl/Extraction.cpp impl/GrpcSource.cpp
impl/Loading.cpp
impl/Loading.cpp impl/TaskManager.cpp
)

target_link_libraries(clio_etlng PUBLIC clio_data)
49 changes: 49 additions & 0 deletions src/etlng/LedgerPublisherInterface.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//------------------------------------------------------------------------------
/*
This file is part of clio: https://github.com/XRPLF/clio
Copyright (c) 2025, the clio developers.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================

#pragma once

#include <chrono>
#include <cstdint>
#include <optional>

namespace etlng {

/**
* @brief The interface of a scheduler for the extraction proccess
*/
struct LedgerPublisherInterface {
virtual ~LedgerPublisherInterface() = default;

/**
* @brief Publish the ledger by its sequence number
*
* @param seq The sequence number of the ledger
* @param maxAttempts The maximum number of attempts to publish the ledger; no limit if nullopt
* @param attemptsDelay The delay between attempts
*/
virtual void
publish(
uint32_t seq,
std::optional<uint32_t> maxAttempts,
std::chrono::steady_clock::duration attemptsDelay = std::chrono::seconds{1}
) = 0;
};

} // namespace etlng
143 changes: 143 additions & 0 deletions src/etlng/impl/TaskManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
//------------------------------------------------------------------------------
/*
This file is part of clio: https://github.com/XRPLF/clio
Copyright (c) 2025, the clio developers.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================

#include "etlng/impl/TaskManager.hpp"

#include "etlng/ExtractorInterface.hpp"
#include "etlng/LoaderInterface.hpp"
#include "etlng/Models.hpp"
#include "etlng/SchedulerInterface.hpp"
#include "util/async/AnyExecutionContext.hpp"
#include "util/async/AnyOperation.hpp"
#include "util/async/AnyStrand.hpp"
#include "util/log/Logger.hpp"

#include <xrpl/protocol/TxFormats.h>

#include <chrono>
#include <cstddef>
#include <functional>
#include <ranges>
#include <thread>
#include <utility>
#include <vector>

namespace etlng::impl {

TaskManager::TaskManager(
util::async::AnyExecutionContext&& ctx,
std::reference_wrapper<SchedulerInterface> scheduler,
std::reference_wrapper<ExtractorInterface> extractor,
std::reference_wrapper<LoaderInterface> loader
)
: ctx_(std::move(ctx)), schedulers_(scheduler), extractor_(extractor), loader_(loader)
{
}

TaskManager::~TaskManager()
{
stop();
}

void
TaskManager::run(Settings settings)
{
static constexpr auto kQUEUE_SIZE_LIMIT = 2048uz;

auto schedulingStrand = ctx_.makeStrand();
PriorityQueue queue(ctx_.makeStrand(), kQUEUE_SIZE_LIMIT);

LOG(log_.debug()) << "Starting task manager...\n";

extractors_.reserve(settings.numExtractors);
for ([[maybe_unused]] auto _ : std::views::iota(0uz, settings.numExtractors))
extractors_.push_back(spawnExtractor(schedulingStrand, queue));

loaders_.reserve(settings.numLoaders);
for ([[maybe_unused]] auto _ : std::views::iota(0uz, settings.numLoaders))
loaders_.push_back(spawnLoader(queue));

wait();
LOG(log_.debug()) << "All finished in task manager..\n";
}

util::async::AnyOperation<void>
TaskManager::spawnExtractor(util::async::AnyStrand& strand, PriorityQueue& queue)
{
// TODO: these values may be extracted to config later and/or need to be fine-tuned on a realistic system
static constexpr auto kDELAY_BETWEEN_ATTEMPTS = std::chrono::milliseconds{100u};
static constexpr auto kDELAY_BETWEEN_ENQUEUE_ATTEMPTS = std::chrono::milliseconds{1u};

return strand.execute([this, &queue](auto stopRequested) {
while (not stopRequested) {
if (auto task = schedulers_.get().next(); task.has_value()) {
if (auto maybeBatch = extractor_.get().extractLedgerWithDiff(task->seq); maybeBatch.has_value()) {
LOG(log_.debug()) << "Adding data after extracting diff";
while (not queue.enqueue(*maybeBatch)) {
// TODO (https://github.com/XRPLF/clio/issues/1852)
std::this_thread::sleep_for(kDELAY_BETWEEN_ENQUEUE_ATTEMPTS);

if (stopRequested)
break;
}
} else {
// TODO: how do we signal to the loaders that it's time to shutdown? some special task?
break; // TODO: handle server shutdown or other node took over ETL
}
} else {
// TODO (https://github.com/XRPLF/clio/issues/1852)
std::this_thread::sleep_for(kDELAY_BETWEEN_ATTEMPTS);
}
}
});
}

util::async::AnyOperation<void>
TaskManager::spawnLoader(PriorityQueue& queue)
{
return ctx_.execute([this, &queue](auto stopRequested) {
while (not stopRequested) {
// TODO (https://github.com/XRPLF/clio/issues/66): does not tell the loader whether it's out of order or not
if (auto data = queue.dequeue(); data.has_value())
loader_.get().load(*data);
}
});
}

void
TaskManager::wait()
{
for (auto& extractor : extractors_)
extractor.wait();
for (auto& loader : loaders_)
loader.wait();
}

void
TaskManager::stop()
{
for (auto& extractor : extractors_)
extractor.abort();
for (auto& loader : loaders_)
loader.abort();

wait();
}

} // namespace etlng::impl
94 changes: 94 additions & 0 deletions src/etlng/impl/TaskManager.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//------------------------------------------------------------------------------
/*
This file is part of clio: https://github.com/XRPLF/clio
Copyright (c) 2025, the clio developers.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================

#pragma once

#include "etlng/ExtractorInterface.hpp"
#include "etlng/LoaderInterface.hpp"
#include "etlng/Models.hpp"
#include "etlng/SchedulerInterface.hpp"
#include "util/StrandedPriorityQueue.hpp"
#include "util/async/AnyExecutionContext.hpp"
#include "util/async/AnyOperation.hpp"
#include "util/async/AnyStrand.hpp"
#include "util/log/Logger.hpp"

#include <xrpl/protocol/TxFormats.h>

#include <cstddef>
#include <functional>
#include <vector>

namespace etlng::impl {

class TaskManager {
util::async::AnyExecutionContext ctx_;
std::reference_wrapper<SchedulerInterface> schedulers_;
std::reference_wrapper<ExtractorInterface> extractor_;
std::reference_wrapper<LoaderInterface> loader_;

std::vector<util::async::AnyOperation<void>> extractors_;
std::vector<util::async::AnyOperation<void>> loaders_;

util::Logger log_{"ETL"};

struct ReverseOrderComparator {
[[nodiscard]] bool
operator()(model::LedgerData const& lhs, model::LedgerData const& rhs) const noexcept
{
return lhs.seq > rhs.seq;
}
};

public:
struct Settings {
size_t numExtractors; /**< number of extraction tasks */
size_t numLoaders; /**< number of loading tasks */
};

// reverse order loading is needed (i.e. start with oldest seq in forward fill buffer)
using PriorityQueue = util::StrandedPriorityQueue<model::LedgerData, ReverseOrderComparator>;

TaskManager(
util::async::AnyExecutionContext&& ctx,
std::reference_wrapper<SchedulerInterface> scheduler,
std::reference_wrapper<ExtractorInterface> extractor,
std::reference_wrapper<LoaderInterface> loader
);

~TaskManager();

void
run(Settings settings);

void
stop();

private:
void
wait();

[[nodiscard]] util::async::AnyOperation<void>
spawnExtractor(util::async::AnyStrand& strand, PriorityQueue& queue);

[[nodiscard]] util::async::AnyOperation<void>
spawnLoader(PriorityQueue& queue);
};

} // namespace etlng::impl
8 changes: 4 additions & 4 deletions src/feed/SubscriptionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ void
SubscriptionManager::pubBookChanges(
ripple::LedgerHeader const& lgrInfo,
std::vector<data::TransactionAndMetadata> const& transactions
) const
)
{
bookChangesFeed_.pub(lgrInfo, transactions);
}
Expand Down Expand Up @@ -111,7 +111,7 @@ SubscriptionManager::pubLedger(
ripple::Fees const& fees,
std::string const& ledgerRange,
std::uint32_t const txnCount
) const
)
{
ledgerFeed_.pub(lgrInfo, fees, ledgerRange, txnCount);
}
Expand All @@ -129,7 +129,7 @@ SubscriptionManager::unsubManifest(SubscriberSharedPtr const& subscriber)
}

void
SubscriptionManager::forwardManifest(boost::json::object const& manifestJson) const
SubscriptionManager::forwardManifest(boost::json::object const& manifestJson)
{
manifestFeed_.pub(manifestJson);
}
Expand All @@ -147,7 +147,7 @@ SubscriptionManager::unsubValidation(SubscriberSharedPtr const& subscriber)
}

void
SubscriptionManager::forwardValidation(boost::json::object const& validationJson) const
SubscriptionManager::forwardValidation(boost::json::object const& validationJson)
{
validationsFeed_.pub(validationJson);
}
Expand Down
8 changes: 4 additions & 4 deletions src/feed/SubscriptionManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class SubscriptionManager : public SubscriptionManagerInterface {
*/
void
pubBookChanges(ripple::LedgerHeader const& lgrInfo, std::vector<data::TransactionAndMetadata> const& transactions)
const final;
final;

/**
* @brief Subscribe to the proposed transactions feed.
Expand Down Expand Up @@ -218,7 +218,7 @@ class SubscriptionManager : public SubscriptionManagerInterface {
ripple::Fees const& fees,
std::string const& ledgerRange,
std::uint32_t txnCount
) const final;
) final;

/**
* @brief Subscribe to the manifest feed.
Expand All @@ -239,7 +239,7 @@ class SubscriptionManager : public SubscriptionManagerInterface {
* @param manifestJson The manifest json to forward.
*/
void
forwardManifest(boost::json::object const& manifestJson) const final;
forwardManifest(boost::json::object const& manifestJson) final;

/**
* @brief Subscribe to the validation feed.
Expand All @@ -260,7 +260,7 @@ class SubscriptionManager : public SubscriptionManagerInterface {
* @param validationJson The validation feed json to forward.
*/
void
forwardValidation(boost::json::object const& validationJson) const final;
forwardValidation(boost::json::object const& validationJson) final;

/**
* @brief Subscribe to the transactions feed.
Expand Down
Loading

0 comments on commit 73f375f

Please sign in to comment.