diff --git a/velox/dwio/common/tests/utils/DataSetBuilder.cpp b/velox/dwio/common/tests/utils/DataSetBuilder.cpp index f9d532c84eab7..1926d20f473a2 100644 --- a/velox/dwio/common/tests/utils/DataSetBuilder.cpp +++ b/velox/dwio/common/tests/utils/DataSetBuilder.cpp @@ -37,16 +37,20 @@ RowTypePtr DataSetBuilder::makeRowType( DataSetBuilder& DataSetBuilder::makeDataset( RowTypePtr rowType, const size_t batchCount, - const size_t numRows) { + const size_t numRows, + const bool withRecursiveNulls) { if (batches_) { batches_->clear(); } else { batches_ = std::make_unique>(); } + auto isNullAt = withRecursiveNulls ? nullptr : [](vector_size_t /*index*/) { + return false; + }; for (size_t i = 0; i < batchCount; ++i) { batches_->push_back(std::static_pointer_cast( - BatchMaker::createBatch(rowType, numRows, pool_, nullptr, i))); + BatchMaker::createBatch(rowType, numRows, pool_, isNullAt, i))); } return *this; diff --git a/velox/dwio/common/tests/utils/DataSetBuilder.h b/velox/dwio/common/tests/utils/DataSetBuilder.h index d43fe28518a8d..4893c28336f62 100644 --- a/velox/dwio/common/tests/utils/DataSetBuilder.h +++ b/velox/dwio/common/tests/utils/DataSetBuilder.h @@ -43,7 +43,8 @@ class DataSetBuilder { DataSetBuilder& makeDataset( RowTypePtr rowType, const size_t batchCount, - const size_t numRows); + const size_t numRows, + const bool withRecursiveNulls = true); // Adds high values to 'batches_' so that these values occur only in some row // groups. Tests skipping row groups based on row group stats. diff --git a/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp b/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp index 755ff71573909..287d245c84085 100644 --- a/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp +++ b/velox/dwio/common/tests/utils/E2EFilterTestBase.cpp @@ -46,12 +46,14 @@ using velox::common::Subfield; std::vector E2EFilterTestBase::makeDataset( std::function customize, - bool forRowGroupSkip) { + bool forRowGroupSkip, + bool withRecursiveNulls) { if (!dataSetBuilder_) { dataSetBuilder_ = std::make_unique(*leafPool_, 0); } - dataSetBuilder_->makeDataset(rowType_, batchCount_, batchSize_); + dataSetBuilder_->makeDataset( + rowType_, batchCount_, batchSize_, withRecursiveNulls); if (forRowGroupSkip) { dataSetBuilder_->withRowGroupSpecificData(kRowsInGroup); @@ -408,17 +410,18 @@ void E2EFilterTestBase::testScenario( std::function customize, bool wrapInStruct, const std::vector& filterable, - int32_t numCombinations) { + int32_t numCombinations, + bool withRecursiveNulls) { rowType_ = DataSetBuilder::makeRowType(columns, wrapInStruct); filterGenerator_ = std::make_unique(rowType_, seed_); - auto batches = makeDataset(customize, false); + auto batches = makeDataset(customize, false, withRecursiveNulls); writeToMemory(rowType_, batches, false); testNoRowGroupSkip(batches, filterable, numCombinations); testPruningWithFilter(batches, filterable); if (testRowGroupSkip_) { - batches = makeDataset(customize, true); + batches = makeDataset(customize, true, withRecursiveNulls); writeToMemory(rowType_, batches, true); testRowGroupSkip(batches, filterable); } diff --git a/velox/dwio/common/tests/utils/E2EFilterTestBase.h b/velox/dwio/common/tests/utils/E2EFilterTestBase.h index f26ac8beef1b8..f0e9d1daa0c9e 100644 --- a/velox/dwio/common/tests/utils/E2EFilterTestBase.h +++ b/velox/dwio/common/tests/utils/E2EFilterTestBase.h @@ -105,7 +105,8 @@ class E2EFilterTestBase : public testing::Test { std::vector makeDataset( std::function customize, - bool forRowGroupSkip); + bool forRowGroupSkip, + bool withRecursiveNulls); void makeAllNulls(const std::string& fieldName); @@ -297,7 +298,8 @@ class E2EFilterTestBase : public testing::Test { std::function customize, bool wrapInStruct, const std::vector& filterable, - int32_t numCombinations); + int32_t numCombinations, + bool withRecursiveNulls = true); private: void testMetadataFilterImpl(