From 415ad044424c8791822a6cbaa28d165257b50bd0 Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Sat, 16 Sep 2023 07:18:59 -0400 Subject: [PATCH] Fix Array/Map/RowVector::copyRanges for UNKNOWN source --- velox/vector/ComplexVector.cpp | 85 +++++++++-- velox/vector/ComplexVector.h | 34 +---- velox/vector/FlatVector.cpp | 14 ++ velox/vector/tests/VectorTest.cpp | 172 ++++++++++++++++++++++ velox/vector/tests/utils/VectorTestBase.h | 2 +- 5 files changed, 262 insertions(+), 45 deletions(-) diff --git a/velox/vector/ComplexVector.cpp b/velox/vector/ComplexVector.cpp index 71b641685e6a..d3be8131821e 100644 --- a/velox/vector/ComplexVector.cpp +++ b/velox/vector/ComplexVector.cpp @@ -180,6 +180,11 @@ void RowVector::copy( const BaseVector* source, const SelectivityVector& rows, const vector_size_t* toSourceRow) { + if (source->typeKind() == TypeKind::UNKNOWN) { + rows.applyToSelected([&](auto row) { setNull(row, true); }); + return; + } + // Copy non-null values. SelectivityVector nonNullRows = rows; @@ -261,6 +266,31 @@ void RowVector::copy( } } +namespace { + +// Runs quick checks to determine whether input vector has only null values. +// @return true if vector has only null values; false if vector may have +// non-null values. +bool isAllNullVector(const BaseVector& vector) { + if (vector.typeKind() == TypeKind::UNKNOWN) { + return true; + } + + if (vector.isConstantEncoding()) { + return vector.isNullAt(0); + } + + auto leafVector = vector.wrappedVector(); + if (leafVector->isConstantEncoding()) { + // A null constant does not have a value vector, so wrappedVector + // returns the constant. + VELOX_CHECK(leafVector->isNullAt(0)); + return true; + } + return false; +} +} // namespace + void RowVector::copyRanges( const BaseVector* source, const folly::Range& ranges) { @@ -268,6 +298,15 @@ void RowVector::copyRanges( return; } + if (isAllNullVector(*source)) { + for (const auto& range : ranges) { + for (auto i = 0; i < range.count; ++i) { + setNull(range.targetIndex + i, true); + } + } + return; + } + auto minTargetIndex = std::numeric_limits::max(); auto maxTargetIndex = std::numeric_limits::min(); for (auto& r : ranges) { @@ -422,23 +461,29 @@ void ArrayVectorBase::copyRangesImpl( const BaseVector* source, const folly::Range& ranges, VectorPtr* targetValues, - const BaseVector* sourceValues, - VectorPtr* targetKeys, - const BaseVector* sourceKeys) { - auto sourceValue = source->wrappedVector(); - if (sourceValue->isConstantEncoding()) { - // A null constant does not have a value vector, so wrappedVector - // returns the constant. - VELOX_CHECK(sourceValue->isNullAt(0)); - for (auto& r : ranges) { - for (auto i = 0; i < r.count; ++i) { - setNull(r.targetIndex + i, true); + VectorPtr* targetKeys) { + if (isAllNullVector(*source)) { + for (const auto& range : ranges) { + for (auto i = 0; i < range.count; ++i) { + setNull(range.targetIndex + i, true); } } return; } - VELOX_CHECK_EQ(sourceValue->encoding(), encoding()); - auto sourceArray = sourceValue->asUnchecked(); + + const BaseVector* sourceValues; + const BaseVector* sourceKeys = nullptr; + + auto leafSource = source->wrappedVector(); + VELOX_CHECK_EQ(leafSource->encoding(), encoding()); + + if (typeKind_ == TypeKind::ARRAY) { + sourceValues = leafSource->as()->elements().get(); + } else { + sourceValues = leafSource->as()->mapValues().get(); + sourceKeys = leafSource->as()->mapKeys().get(); + } + if (targetKeys) { BaseVector::ensureWritable( SelectivityVector::empty(), @@ -452,6 +497,8 @@ void ArrayVectorBase::copyRangesImpl( pool(), *targetValues); } + + auto sourceArray = leafSource->asUnchecked(); auto setNotNulls = mayHaveNulls() || source->mayHaveNulls(); auto* mutableOffsets = offsets_->asMutable(); auto* mutableSizes = sizes_->asMutable(); @@ -869,6 +916,12 @@ void ArrayVector::validate(const VectorValidateOptions& options) const { elements_->validate(options); } +void ArrayVector::copyRanges( + const BaseVector* source, + const folly::Range& ranges) { + copyRangesImpl(source, ranges, &elements_, nullptr); +} + bool MapVector::containsNullAt(vector_size_t idx) const { if (BaseVector::isNullAt(idx)) { return true; @@ -1158,5 +1211,11 @@ void MapVector::validate(const VectorValidateOptions& options) const { values_->validate(options); } +void MapVector::copyRanges( + const BaseVector* source, + const folly::Range& ranges) { + copyRangesImpl(source, ranges, &values_, &keys_); +} + } // namespace velox } // namespace facebook diff --git a/velox/vector/ComplexVector.h b/velox/vector/ComplexVector.h index e8e47e377220..4ac5a43412b7 100644 --- a/velox/vector/ComplexVector.h +++ b/velox/vector/ComplexVector.h @@ -317,9 +317,7 @@ struct ArrayVectorBase : BaseVector { const BaseVector* source, const folly::Range& ranges, VectorPtr* targetValues, - const BaseVector* sourceValues, - VectorPtr* targetKeys, - const BaseVector* sourceKeys); + VectorPtr* targetKeys); void validateArrayVectorBase( const VectorValidateOptions& options, @@ -407,20 +405,7 @@ class ArrayVector : public ArrayVectorBase { void copyRanges( const BaseVector* source, - const folly::Range& ranges) override { - const ArrayVector* sourceArray{}; - if (auto wrapped = source->wrappedVector(); - !wrapped->isConstantEncoding()) { - sourceArray = wrapped->asUnchecked(); - } - copyRangesImpl( - source, - ranges, - &elements_, - sourceArray ? sourceArray->elements_.get() : nullptr, - nullptr, - nullptr); - } + const folly::Range& ranges) override; uint64_t retainedSize() const override { return BaseVector::retainedSize() + offsets_->capacity() + @@ -547,20 +532,7 @@ class MapVector : public ArrayVectorBase { void copyRanges( const BaseVector* source, - const folly::Range& ranges) override { - const MapVector* sourceMap{}; - if (auto wrapped = source->wrappedVector(); - !wrapped->isConstantEncoding()) { - sourceMap = wrapped->asUnchecked(); - } - copyRangesImpl( - source, - ranges, - &values_, - sourceMap ? sourceMap->values_.get() : nullptr, - &keys_, - sourceMap ? sourceMap->keys_.get() : nullptr); - } + const folly::Range& ranges) override; uint64_t retainedSize() const override { return BaseVector::retainedSize() + offsets_->capacity() + diff --git a/velox/vector/FlatVector.cpp b/velox/vector/FlatVector.cpp index cbc846270a18..d3cafc598401 100644 --- a/velox/vector/FlatVector.cpp +++ b/velox/vector/FlatVector.cpp @@ -51,6 +51,11 @@ void FlatVector::copyValuesAndNulls( const BaseVector* source, const SelectivityVector& rows, const vector_size_t* toSourceRow) { + if (source->typeKind() == TypeKind::UNKNOWN) { + rows.applyToSelected([&](auto row) { setNull(row, true); }); + return; + } + source = source->loadedVector(); VELOX_CHECK( BaseVector::compatibleKind(BaseVector::typeKind(), source->typeKind())); @@ -477,6 +482,15 @@ template <> void FlatVector::copyRanges( const BaseVector* source, const folly::Range& ranges) { + if (source->typeKind() == TypeKind::UNKNOWN) { + for (const auto& range : ranges) { + for (auto i = 0; i < range.count; ++i) { + setNull(range.targetIndex + i, true); + } + } + return; + } + auto leaf = source->wrappedVector()->asUnchecked>(); if (pool_ == leaf->pool()) { // We copy referencing the storage of 'source'. diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index 109f14419733..c828ab7cbd8e 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -565,6 +565,124 @@ class VectorTest : public testing::Test, public test::VectorTestBase { testCopy(lazy, level - 1); } + void testCopyFromUnknown(const VectorPtr& vector) { + SCOPED_TRACE(vector->toString()); + + const vector_size_t size = 1'000; + + ASSERT_GE(vector->size(), size); + + // Save a copy of the 'vector' to compare results after copy. + auto vectorCopy = BaseVector::copy(*vector); + + auto unknown = makeAllNullFlatVector(size); + + // Copy every 3-rd row. + SelectivityVector rowsToCopy(size, false); + for (auto i = 0; i < size; i += 3) { + rowsToCopy.setValid(i, true); + } + rowsToCopy.updateBounds(); + + SelectivityVector rowsToKeep(size); + rowsToKeep.deselect(rowsToCopy); + + // Copy from row N to N - 10 for N >= 0 and from row N to N for N < 10; + std::vector toSourceRow(size); + for (auto i = 0; i < size; i += 3) { + if (i < 10) { + toSourceRow[i] = i; + } else { + toSourceRow[i] = i - 10; + } + } + + vector->copy(unknown.get(), rowsToCopy, toSourceRow.data()); + + rowsToCopy.applyToSelected( + [&](auto row) { EXPECT_TRUE(vector->isNullAt(row)) << "at " << row; }); + + rowsToKeep.applyToSelected([&](vector_size_t row) { + EXPECT_FALSE(vector->isNullAt(row)); + EXPECT_TRUE(vector->equalValueAt(vectorCopy.get(), row, row)) + << "at " << row << ": " << vector->toString(row) << " vs. " + << vectorCopy->toString(row); + }); + } + + void testCopySingleRangeFromUnknown(const VectorPtr& vector) { + SCOPED_TRACE(vector->toString()); + + const vector_size_t size = 1'000; + + ASSERT_GE(vector->size(), size); + + // Save a copy of the 'vector' to compare results after copy. + auto vectorCopy = BaseVector::copy(*vector); + + auto unknown = makeAllNullFlatVector(size); + + vector->copy(unknown.get(), 40, 33, 78); + + for (auto i = 0; i < size; ++i) { + if (i < 40 || i >= 40 + 78) { + EXPECT_FALSE(vector->isNullAt(i)); + EXPECT_TRUE(vector->equalValueAt(vectorCopy.get(), i, i)) + << "at " << i << ": " << vector->toString(i) << " vs. " + << vectorCopy->toString(i); + } else { + EXPECT_TRUE(vector->isNullAt(i)) << "at " << i; + } + } + } + + void testCopyRangesFromUnknown(const VectorPtr& vector) { + SCOPED_TRACE(vector->toString()); + + const vector_size_t size = 1'000; + + ASSERT_GE(vector->size(), size); + + // Save a copy of the 'vector' to compare results after copy. + auto vectorCopy = BaseVector::copy(*vector); + + auto unknown = makeAllNullFlatVector(size); + + std::vector rangesToCopy = { + {0, 0, 7}, + {10, 12, 5}, + {100, 500, 79}, + {200, 601, 1}, + {990, 950, 10}, + }; + + std::vector rangesToKeep = { + {0, 7, 5}, + {0, 17, 500 - 17}, + {0, 579, 601 - 579}, + {0, 602, 950 - 602}, + {0, 960, 40}, + }; + + vector->copyRanges(unknown.get(), rangesToCopy); + + for (const auto& range : rangesToCopy) { + for (auto i = 0; i < range.count; ++i) { + EXPECT_TRUE(vector->isNullAt(range.targetIndex + i)); + } + } + + for (const auto& range : rangesToKeep) { + for (auto i = 0; i < range.count; ++i) { + auto index = range.targetIndex + i; + EXPECT_FALSE(vector->isNullAt(index)); + EXPECT_TRUE(vector->equalValueAt(vectorCopy.get(), index, index)) + << "at " << index << ": " << vector->toString(index) << " vs. " + << vectorCopy->toString(index); + } + } + } + static void testSlice( const VectorPtr& vec, int level, @@ -1121,6 +1239,60 @@ TEST_F(VectorTest, copyToAllNullsFlatVector) { } } +TEST_F(VectorTest, copyFromUnknown) { + vector_size_t size = 1'000; + + auto test = [&](const auto& makeVectorFunc) { + auto vector = makeVectorFunc(); + testCopyFromUnknown(vector); + + vector = makeVectorFunc(); + testCopySingleRangeFromUnknown(vector); + + vector = makeVectorFunc(); + testCopyRangesFromUnknown(vector); + }; + + // Copy to BIGINT. + test([&]() { + return makeFlatVector(size, [](auto row) { return row; }); + }); + + // Copy to BOOLEAN. + test([&]() { + return makeFlatVector(size, [](auto row) { return row % 7 == 3; }); + }); + + // Copy to VARCHAR. + test([&]() { + return makeFlatVector( + size, [](auto row) { return std::string(row % 17, 'x'); }); + }); + + // Copy to ARRAY. + test([&]() { + return makeArrayVector( + size, [](auto row) { return row % 7; }, [](auto row) { return row; }); + }); + + // Copy to MAP. + test([&]() { + return makeMapVector( + size, + [](auto row) { return row % 7; }, + [](auto row) { return row; }, + [](auto row) { return row * 0.1; }); + }); + + // Copy to ROW. + test([&]() { + return makeRowVector({ + makeFlatVector(size, [](auto row) { return row; }), + makeFlatVector(size, [](auto row) { return row * 0.1; }), + }); + }); +} + TEST_F(VectorTest, wrapInConstant) { // wrap flat vector const vector_size_t size = 1'000; diff --git a/velox/vector/tests/utils/VectorTestBase.h b/velox/vector/tests/utils/VectorTestBase.h index 24d4a1992ee6..8e5d37bded91 100644 --- a/velox/vector/tests/utils/VectorTestBase.h +++ b/velox/vector/tests/utils/VectorTestBase.h @@ -153,7 +153,7 @@ class VectorTestBase { } template - FlatVectorPtr makeFlatVector( + FlatVectorPtr> makeFlatVector( vector_size_t size, std::function valueAt, std::function isNullAt = nullptr,