Skip to content

Commit

Permalink
refactor(fuzzer): Move setupReferenceQueryRunner to FuzzerUtil (faceb…
Browse files Browse the repository at this point in the history
…ookincubator#12232)

Summary:
Reuse the common code across all fuzzers.

Pull Request resolved: facebookincubator#12232

Reviewed By: pedroerp

Differential Revision: D69079394

Pulled By: kagamiori

fbshipit-source-id: 9a3a4197f98059a29c46f51cecb94c290a9d6d70
  • Loading branch information
aditi-pandit authored and facebook-github-bot committed Feb 4, 2025
1 parent f73b7b1 commit a785f7e
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 91 deletions.
30 changes: 0 additions & 30 deletions velox/exec/fuzzer/AggregationFuzzerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
#include "velox/common/base/VeloxException.h"
#include "velox/connectors/hive/HiveConnectorSplit.h"
#include "velox/dwio/dwrf/writer/Writer.h"
#include "velox/exec/fuzzer/DuckQueryRunner.h"
#include "velox/exec/fuzzer/PrestoQueryRunner.h"
#include "velox/exec/tests/utils/TempDirectoryPath.h"
#include "velox/expression/SignatureBinder.h"
#include "velox/expression/fuzzer/ArgumentTypeFuzzer.h"
Expand Down Expand Up @@ -813,34 +811,6 @@ void persistReproInfo(
}
}

std::unique_ptr<ReferenceQueryRunner> setupReferenceQueryRunner(
memory::MemoryPool* aggregatePool,
const std::string& prestoUrl,
const std::string& runnerName,
const uint32_t& reqTimeoutMs) {
if (prestoUrl.empty()) {
auto duckQueryRunner = std::make_unique<DuckQueryRunner>(aggregatePool);
duckQueryRunner->disableAggregateFunctions({
"skewness",
// DuckDB results on constant inputs are incorrect. Should be NaN,
// but DuckDB returns some random value.
"kurtosis",
"entropy",
// Regr_count result in DuckDB is incorrect when the input data is null.
"regr_count",
});
LOG(INFO) << "Using DuckDB as the reference DB.";
return duckQueryRunner;
} else {
return std::make_unique<PrestoQueryRunner>(
aggregatePool,
prestoUrl,
runnerName,
static_cast<std::chrono::milliseconds>(reqTimeoutMs));
LOG(INFO) << "Using Presto as the reference DB.";
}
}

std::vector<std::string> retrieveWindowFunctionName(
const core::PlanNodePtr& node) {
auto windowNode = std::dynamic_pointer_cast<const core::WindowNode>(node);
Expand Down
9 changes: 0 additions & 9 deletions velox/exec/fuzzer/AggregationFuzzerBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,6 @@ void persistReproInfo(
const std::vector<AggregationFuzzerBase::PlanWithSplits>& plans,
const std::string& basePath);

// Returns a PrestoQueryRunner instance if prestoUrl is non-empty. Otherwise,
// returns a DuckQueryRunner instance and set disabled aggregation functions
// properly.
std::unique_ptr<ReferenceQueryRunner> setupReferenceQueryRunner(
memory::MemoryPool* aggregatePool,
const std::string& prestoUrl,
const std::string& runnerName,
const uint32_t& reqTimeoutMs);

// Returns the function name used in a WindowNode. The input `node` should be a
// pointer to a WindowNode.
std::vector<std::string> retrieveWindowFunctionName(
Expand Down
30 changes: 30 additions & 0 deletions velox/exec/fuzzer/FuzzerUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "velox/connectors/hive/HiveConnectorSplit.h"
#include "velox/dwio/catalog/fbhive/FileUtils.h"
#include "velox/dwio/dwrf/writer/Writer.h"
#include "velox/exec/fuzzer/DuckQueryRunner.h"
#include "velox/exec/fuzzer/PrestoQueryRunner.h"
#include "velox/expression/SignatureBinder.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"

Expand Down Expand Up @@ -378,6 +380,34 @@ void registerHiveConnector(
connector::registerConnector(hiveConnector);
}

std::unique_ptr<ReferenceQueryRunner> setupReferenceQueryRunner(
memory::MemoryPool* aggregatePool,
const std::string& prestoUrl,
const std::string& runnerName,
const uint32_t& reqTimeoutMs) {
if (prestoUrl.empty()) {
auto duckQueryRunner = std::make_unique<DuckQueryRunner>(aggregatePool);
duckQueryRunner->disableAggregateFunctions({
"skewness",
// DuckDB results on constant inputs are incorrect. Should be NaN,
// but DuckDB returns some random value.
"kurtosis",
"entropy",
// Regr_count result in DuckDB is incorrect when the input data is null.
"regr_count",
});
LOG(INFO) << "Using DuckDB as the reference DB.";
return duckQueryRunner;
} else {
return std::make_unique<PrestoQueryRunner>(
aggregatePool,
prestoUrl,
runnerName,
static_cast<std::chrono::milliseconds>(reqTimeoutMs));
LOG(INFO) << "Using Presto as the reference DB.";
}
}

std::pair<std::optional<MaterializedRowMultiset>, ReferenceQueryErrorCode>
computeReferenceResults(
const core::PlanNodePtr& plan,
Expand Down
9 changes: 9 additions & 0 deletions velox/exec/fuzzer/FuzzerUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,15 @@ void setupMemory(
void registerHiveConnector(
const std::unordered_map<std::string, std::string>& hiveConfigs);

// Returns a PrestoQueryRunner instance if prestoUrl is non-empty. Otherwise,
// returns a DuckQueryRunner instance and set disabled aggregation functions
// properly.
std::unique_ptr<ReferenceQueryRunner> setupReferenceQueryRunner(
memory::MemoryPool* aggregatePool,
const std::string& prestoUrl,
const std::string& runnerName,
const uint32_t& reqTimeoutMs);

// Converts 'plan' into an SQL query and runs in the reference DB.
// Result is returned as a MaterializedRowMultiset with the
// ReferenceQueryErrorCode::kSuccess if successful, or an std::nullopt with a
Expand Down
27 changes: 1 addition & 26 deletions velox/exec/fuzzer/RowNumberFuzzerRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,11 @@
#include <folly/init/Init.h>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <unordered_set>
#include "velox/common/file/FileSystems.h"
#include "velox/common/memory/SharedArbitrator.h"
#include "velox/connectors/hive/HiveConnector.h"
#include "velox/exec/MemoryReclaimer.h"
#include "velox/exec/fuzzer/DuckQueryRunner.h"
#include "velox/exec/fuzzer/FuzzerUtil.h"
#include "velox/exec/fuzzer/PrestoQueryRunner.h"
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
#include "velox/exec/fuzzer/RowNumberFuzzer.h"
#include "velox/serializers/PrestoSerializer.h"
Expand Down Expand Up @@ -86,28 +83,6 @@ DEFINE_int64(arbitrator_capacity, 6L << 30, "Arbitrator capacity in bytes.");

using namespace facebook::velox::exec;

namespace {
std::unique_ptr<test::ReferenceQueryRunner> setupReferenceQueryRunner(
facebook::velox::memory::MemoryPool* aggregatePool,
const std::string& prestoUrl,
const std::string& runnerName,
const uint32_t& reqTimeoutMs) {
if (prestoUrl.empty()) {
auto duckQueryRunner =
std::make_unique<test::DuckQueryRunner>(aggregatePool);
LOG(INFO) << "Using DuckDB as the reference DB.";
return duckQueryRunner;
}

LOG(INFO) << "Using Presto as the reference DB.";
return std::make_unique<test::PrestoQueryRunner>(
aggregatePool,
prestoUrl,
runnerName,
static_cast<std::chrono::milliseconds>(reqTimeoutMs));
}
} // namespace

int main(int argc, char** argv) {
// Calls common init functions in the necessary order, initializing
// singletons, installing proper signal handlers for better debugging
Expand All @@ -116,7 +91,7 @@ int main(int argc, char** argv) {
test::setupMemory(FLAGS_allocator_capacity, FLAGS_arbitrator_capacity);
std::shared_ptr<facebook::velox::memory::MemoryPool> rootPool{
facebook::velox::memory::memoryManager()->addRootPool()};
auto referenceQueryRunner = setupReferenceQueryRunner(
auto referenceQueryRunner = test::setupReferenceQueryRunner(
rootPool.get(),
FLAGS_presto_url,
"row_number_fuzzer",
Expand Down
27 changes: 1 addition & 26 deletions velox/exec/tests/JoinFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@
#include <folly/init/Init.h>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <unordered_set>

#include "velox/exec/fuzzer/DuckQueryRunner.h"
#include "velox/exec/fuzzer/FuzzerUtil.h"
#include "velox/exec/fuzzer/JoinFuzzerRunner.h"
#include "velox/exec/fuzzer/PrestoQueryRunner.h"
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"

DEFINE_int64(
Expand Down Expand Up @@ -50,28 +47,6 @@ DEFINE_int64(arbitrator_capacity, 6L << 30, "Arbitrator capacity in bytes.");

using namespace facebook::velox::exec;

namespace {
std::unique_ptr<test::ReferenceQueryRunner> setupReferenceQueryRunner(
facebook::velox::memory::MemoryPool* aggregatePool,
const std::string& prestoUrl,
const std::string& runnerName,
const uint32_t& reqTimeoutMs) {
if (prestoUrl.empty()) {
auto duckQueryRunner =
std::make_unique<test::DuckQueryRunner>(aggregatePool);
LOG(INFO) << "Using DuckDB as the reference DB.";
return duckQueryRunner;
}

LOG(INFO) << "Using Presto as the reference DB.";
return std::make_unique<test::PrestoQueryRunner>(
aggregatePool,
prestoUrl,
runnerName,
static_cast<std::chrono::milliseconds>(reqTimeoutMs));
}
} // namespace

int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);

Expand All @@ -82,7 +57,7 @@ int main(int argc, char** argv) {
test::setupMemory(FLAGS_allocator_capacity, FLAGS_arbitrator_capacity);
std::shared_ptr<facebook::velox::memory::MemoryPool> rootPool{
facebook::velox::memory::memoryManager()->addRootPool()};
auto referenceQueryRunner = setupReferenceQueryRunner(
auto referenceQueryRunner = test::setupReferenceQueryRunner(
rootPool.get(), FLAGS_presto_url, "join_fuzzer", FLAGS_req_timeout_ms);
const size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed;
return test::JoinFuzzerRunner::run(
Expand Down

0 comments on commit a785f7e

Please sign in to comment.