diff --git a/velox/connectors/tpch/tests/TpchConnectorTest.cpp b/velox/connectors/tpch/tests/TpchConnectorTest.cpp index a178880dfe510..c33f07b6cb06c 100644 --- a/velox/connectors/tpch/tests/TpchConnectorTest.cpp +++ b/velox/connectors/tpch/tests/TpchConnectorTest.cpp @@ -50,31 +50,15 @@ class TpchConnectorTest : public exec::test::OperatorTestBase { return exec::Split(std::make_shared(kTpchConnectorId)); } - // Helper function to create 1:1 assignments maps based on output type. - auto defaultAssignments(const RowTypePtr& outputType) const { - std::unordered_map> - assignmentsMap; - - for (const auto& columnName : outputType->names()) { - assignmentsMap.emplace( - columnName, std::make_shared(columnName)); - } - return assignmentsMap; - } - void runScaleFactorTest(size_t scaleFactor); }; // Simple scan of first 5 rows of "nation". TEST_F(TpchConnectorTest, simple) { - auto outputType = - ROW({"n_nationkey", "n_name", "n_regionkey", "n_comment"}, - {BIGINT(), VARCHAR(), BIGINT(), VARCHAR()}); auto plan = PlanBuilder() .tableScan( - outputType, - std::make_shared(Table::TBL_NATION), - defaultAssignments(outputType)) + Table::TBL_NATION, + {"n_nationkey", "n_name", "n_regionkey", "n_comment"}) .limit(0, 5, false) .planNode(); @@ -106,13 +90,7 @@ TEST_F(TpchConnectorTest, simple) { // Extract single column from "nation". TEST_F(TpchConnectorTest, singleColumn) { - auto outputType = ROW({"n_name"}, {VARCHAR()}); - auto plan = PlanBuilder() - .tableScan( - outputType, - std::make_shared(Table::TBL_NATION), - defaultAssignments(outputType)) - .planNode(); + auto plan = PlanBuilder().tableScan(Table::TBL_NATION, {"n_name"}).planNode(); auto output = getResults(plan, {makeTpchSplit()}); auto expected = makeRowVector({makeFlatVector({ @@ -183,15 +161,13 @@ TEST_F(TpchConnectorTest, simpleAggregation) { } TEST_F(TpchConnectorTest, unknownColumn) { - auto outputType = ROW({"does_not_exist"}, {VARCHAR()}); - auto plan = PlanBuilder() - .tableScan( - outputType, - std::make_shared(Table::TBL_NATION), - defaultAssignments(outputType)) - .planNode(); - - EXPECT_THROW(getResults(plan, {makeTpchSplit()}), VeloxRuntimeError); + EXPECT_THROW( + { + PlanBuilder() + .tableScan(Table::TBL_NATION, {"does_not_exist"}) + .planNode(); + }, + VeloxUserError); } } // namespace diff --git a/velox/exec/tests/utils/CMakeLists.txt b/velox/exec/tests/utils/CMakeLists.txt index b72d0364d939a..115707921a577 100644 --- a/velox/exec/tests/utils/CMakeLists.txt +++ b/velox/exec/tests/utils/CMakeLists.txt @@ -36,5 +36,6 @@ target_link_libraries( velox_dwio_dwrf_writer velox_dwrf_test_utils velox_hive_connector + velox_tpch_connector velox_presto_serializer velox_functions_prestosql) diff --git a/velox/exec/tests/utils/PlanBuilder.cpp b/velox/exec/tests/utils/PlanBuilder.cpp index cc3b19711b97c..3657ee6d19951 100644 --- a/velox/exec/tests/utils/PlanBuilder.cpp +++ b/velox/exec/tests/utils/PlanBuilder.cpp @@ -17,6 +17,7 @@ #include "velox/exec/tests/utils/PlanBuilder.h" #include "velox/common/memory/Memory.h" #include "velox/connectors/hive/HiveConnector.h" +#include "velox/connectors/tpch/TpchConnector.h" #include "velox/duckdb/conversion/DuckParser.h" #include "velox/exec/Aggregate.h" #include "velox/exec/HashPartitionFunction.h" @@ -424,6 +425,30 @@ PlanBuilder& PlanBuilder::tableScan( return *this; } +PlanBuilder& PlanBuilder::tableScan( + tpch::Table table, + std::vector&& columnNames, + size_t scaleFactor) { + std::unordered_map> + assignmentsMap; + std::vector outputTypes; + + assignmentsMap.reserve(columnNames.size()); + outputTypes.reserve(columnNames.size()); + + for (const auto& columnName : columnNames) { + assignmentsMap.emplace( + columnName, + std::make_shared(columnName)); + outputTypes.emplace_back(resolveTpchColumn(table, columnName)); + } + auto rowType = ROW(std::move(columnNames), std::move(outputTypes)); + return tableScan( + rowType, + std::make_shared(table, scaleFactor), + assignmentsMap); +} + PlanBuilder& PlanBuilder::values( const std::vector& values, bool parallelizable) { diff --git a/velox/exec/tests/utils/PlanBuilder.h b/velox/exec/tests/utils/PlanBuilder.h index 9174a5639fce0..0e1bf62294683 100644 --- a/velox/exec/tests/utils/PlanBuilder.h +++ b/velox/exec/tests/utils/PlanBuilder.h @@ -24,6 +24,10 @@ namespace facebook::velox::core { class IExpr; } +namespace facebook::velox::tpch { +enum class Table : uint8_t; +} + namespace facebook::velox::exec::test { /// Generates unique sequential plan node IDs starting with zero or specified @@ -145,6 +149,7 @@ class PlanBuilder { /// Add a TableScanNode using a connector-specific table handle and /// assignments. Supports any connector, not just Hive connector. + /// /// @param outputType List of column names and types to project out. Column /// names should match the keys in the 'assignments' map. The 'assignments' /// map may contain more columns then 'outputType' if some columns are only @@ -156,7 +161,19 @@ class PlanBuilder { std::string, std::shared_ptr>& assignments); + /// Add a TableScanNode to scan a TPC-H table. + /// + /// @param tpchTableHandle The handle that specifies the target TPC-H table + /// and scale factor. + /// @param columnNames The columns to be returned from that table. + /// @param scaleFactor The TPC-H scale factor. + PlanBuilder& tableScan( + tpch::Table table, + std::vector&& columnNames, + size_t scaleFactor = 1); + /// Add a ValuesNode using specified data. + /// /// @param values The data to use. /// @param parallelizable If true, ValuesNode can run multi-threaded, in which /// case it will produce duplicate data from each thread, e.g. each thread diff --git a/velox/tpch/gen/TpchGen.cpp b/velox/tpch/gen/TpchGen.cpp index 4aef0afdb1d78..a12fbd3751155 100644 --- a/velox/tpch/gen/TpchGen.cpp +++ b/velox/tpch/gen/TpchGen.cpp @@ -321,6 +321,10 @@ RowTypePtr getTableSchema(Table table) { return nullptr; // make gcc happy. } +TypePtr resolveTpchColumn(Table table, const std::string& columnName) { + return getTableSchema(table)->findChild(columnName); +} + RowVectorPtr genTpchOrders( size_t maxRows, size_t offset, diff --git a/velox/tpch/gen/TpchGen.h b/velox/tpch/gen/TpchGen.h index c9fd5578740a6..45eaef88b5acd 100644 --- a/velox/tpch/gen/TpchGen.h +++ b/velox/tpch/gen/TpchGen.h @@ -35,7 +35,7 @@ namespace facebook::velox::tpch { /// /// Data is always returned in a RowVector. -enum class Table { +enum class Table : uint8_t { TBL_PART, TBL_SUPPLIER, TBL_PARTSUPP, @@ -58,6 +58,10 @@ size_t getRowCount(Table table, size_t scaleFactor); /// Returns the schema (RowType) for a particular TPC-H table. RowTypePtr getTableSchema(Table table); +/// Returns the type of a particular table:column pair. Throws if `columnName` +/// does not exist in `table`. +TypePtr resolveTpchColumn(Table table, const std::string& columnName); + /// Returns a row vector containing at most `maxRows` rows of the "orders" /// table, starting at `offset`, and given the scale factor. The row vector /// returned has the following schema: