From 016de0a833cf8e9d5cdadef5ba4549bea0063c85 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Thu, 17 Mar 2022 19:35:17 -0700 Subject: [PATCH] Remove min/max from SimpleVector (#1239) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/1239 Today SimpleVector may receive the min/max values as part of its metadata which means they need to be serialized/deserialized to strings. This seems inefficient, and it doesn't work for types like opaque which can't necessarily be converted to strings a straight forward way. Moreover, it doesn't look like these values are used anywhere outside of tests, so I'm opting to just remove them. If we need them, passing them as std::optional seems like a better way. I'm also changing BiasVector to take the bias directly, instead of doing the same serialization/deserialization. Can we get rid of metadata entirely? Or use a properly typed class instead of a mapping from string:string? Differential Revision: D34980442 fbshipit-source-id: ec7488d4adf27ad7d206f3c5cdb2d6aa865d3de1 --- velox/vector/BiasVector-inl.h | 9 +-- velox/vector/BiasVector.h | 5 +- velox/vector/CMakeLists.txt | 1 - velox/vector/SimpleVector.cpp | 39 ------------- velox/vector/SimpleVector.h | 77 +------------------------- velox/vector/tests/VectorMaker-inl.h | 30 +++------- velox/vector/tests/VectorMakerTest.cpp | 18 ------ velox/vector/tests/VectorTest.cpp | 9 ++- velox/vector/tests/VectorTestUtils.h | 8 --- 9 files changed, 16 insertions(+), 180 deletions(-) delete mode 100644 velox/vector/SimpleVector.cpp diff --git a/velox/vector/BiasVector-inl.h b/velox/vector/BiasVector-inl.h index 16389b87bf88..f9913d1503fa 100644 --- a/velox/vector/BiasVector-inl.h +++ b/velox/vector/BiasVector-inl.h @@ -56,6 +56,7 @@ BiasVector::BiasVector( size_t length, TypeKind valueType, BufferPtr values, + T&& bias, const folly::F14FastMap& metaData, std::optional distinctCount, std::optional nullCount, @@ -73,16 +74,12 @@ BiasVector::BiasVector( representedBytes, storageByteCount), valueType_(valueType), - values_(std::move(values)) { + values_(std::move(values)), + bias_(std::move(bias)) { VELOX_CHECK( valueType_ == TypeKind::INTEGER || valueType_ == TypeKind::SMALLINT || valueType_ == TypeKind::TINYINT, "Invalid array type for biased values"); - - auto bias = - SimpleVector::template getMetaDataValue(metaData, BIAS_VALUE); - VELOX_CHECK(bias.has_value(), "Bias value is required"); - bias_ = bias.value(); biasBuffer_ = simd::setAll256i(bias_); switch (valueType_) { diff --git a/velox/vector/BiasVector.h b/velox/vector/BiasVector.h index 3e18f1e95b8c..b7ac935c99f3 100644 --- a/velox/vector/BiasVector.h +++ b/velox/vector/BiasVector.h @@ -92,7 +92,6 @@ class BiasVector : public SimpleVector { // bias"); public: - static const std::string BIAS_VALUE; static constexpr bool can_simd = (std::is_same::value || std::is_same::value || std::is_same::value); @@ -103,6 +102,7 @@ class BiasVector : public SimpleVector { size_t length, TypeKind valueType, BufferPtr values, + T&& bias, const folly::F14FastMap& metaData = cdvi::EMPTY_METADATA, std::optional distinctCount = std::nullopt, @@ -256,9 +256,6 @@ class BiasVector : public SimpleVector { __m256i biasBuffer_; }; -template -const std::string BiasVector::BIAS_VALUE = "CDV1"; - template using BiasVectorPtr = std::shared_ptr>; diff --git a/velox/vector/CMakeLists.txt b/velox/vector/CMakeLists.txt index 51dbffeaae05..fdff5805e11b 100644 --- a/velox/vector/CMakeLists.txt +++ b/velox/vector/CMakeLists.txt @@ -21,7 +21,6 @@ add_library( LazyVector.cpp SelectivityVector.cpp SequenceVector.cpp - SimpleVector.cpp VectorEncoding.cpp VectorStream.cpp) diff --git a/velox/vector/SimpleVector.cpp b/velox/vector/SimpleVector.cpp deleted file mode 100644 index a037e3e1d7c9..000000000000 --- a/velox/vector/SimpleVector.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/vector/SimpleVector.h" -#include "velox/common/base/Exceptions.h" - -namespace facebook { -namespace velox { - -template <> -void SimpleVector::setMinMax( - const folly::F14FastMap& metaData) { - auto it = metaData.find(META_MIN); - if (it != metaData.end()) { - minString_ = it->second; - min_ = StringView(minString_); - } - it = metaData.find(META_MAX); - if (it != metaData.end()) { - maxString_ = it->second; - max_ = StringView(maxString_); - } -} - -} // namespace velox -} // namespace facebook diff --git a/velox/vector/SimpleVector.h b/velox/vector/SimpleVector.h index d515d9e05770..825f7c17d29a 100644 --- a/velox/vector/SimpleVector.h +++ b/velox/vector/SimpleVector.h @@ -37,34 +37,6 @@ namespace exec { class EvalCtx; } -/** - * Adds the given value to the metaData in a proper encoding. Will overwrite - * any existing value in the map. - */ -template -void encodeMetaData( - folly::F14FastMap& metaData, - const std::string& key, - V value) { - metaData.insert_or_assign(key, velox::to(value)); -} - -template <> -inline void encodeMetaData( - folly::F14FastMap& metaData, - const std::string& key, - StringView value) { - metaData.insert_or_assign(key, std::string(value.data(), value.size())); -} - -template <> -inline void encodeMetaData( - folly::F14FastMap& /*metaData*/, - const std::string& /*key*/, - std::shared_ptr /*value*/) { - VELOX_NYI(); -} - // This class abstracts over various Columnar Storage Formats such that Velox // can select the most appropriate one on a per field / per block basis. // The goal is to use the most appropriate type to optimize for: @@ -77,9 +49,6 @@ inline void encodeMetaData( template class SimpleVector : public BaseVector { public: - constexpr static auto META_MIN = folly::makeFixedString("CTV1"); - constexpr static auto META_MAX = folly::makeFixedString("CTV2"); - SimpleVector( velox::memory::MemoryPool* pool, std::shared_ptr type, @@ -101,9 +70,7 @@ class SimpleVector : public BaseVector { representedByteCount, storageByteCount), isSorted_(isSorted), - elementSize_(sizeof(T)) { - setMinMax(metaData); - } + elementSize_(sizeof(T)) {} // Constructs SimpleVector inferring the type from T. SimpleVector( @@ -130,17 +97,6 @@ class SimpleVector : public BaseVector { virtual ~SimpleVector() override {} - folly::F14FastMap genMetaData() const { - folly::F14FastMap metaData; - if (min_.hasValue()) { - encodeMetaData(metaData, META_MIN, min_.value()); - } - if (max_.hasValue()) { - encodeMetaData(metaData, META_MAX, max_.value()); - } - return metaData; - } - // Concrete Vector types need to implement this themselves. // This method does not do bounds checking. When the value is null the return // value is technically undefined (currently implemented as default of T) @@ -208,14 +164,6 @@ class SimpleVector : public BaseVector { return isSorted_; } - const std::optional& getMin() const { - return min_; - } - - const std::optional& getMax() const { - return max_; - } - void resize(vector_size_t size, bool setNotNull = true) override { VELOX_CHECK(false, "Can only resize flat vectors."); } @@ -380,18 +328,7 @@ class SimpleVector : public BaseVector { sizeof(T)); } - std::optional min_; - std::optional max_; - // Holds the data for StringView min/max. - std::string minString_; - std::string maxString_; - private: - void setMinMax(const folly::F14FastMap& metaData) { - min_ = getMetaDataValue(metaData, META_MIN); - max_ = getMetaDataValue(metaData, META_MAX); - } - int comparePrimitiveAsc(const T& left, const T& right) const { if constexpr (std::is_floating_point::value) { bool isLeftNan = std::isnan(left); @@ -422,18 +359,6 @@ class SimpleVector : public BaseVector { SelectivityVector asciiSetRows_; }; // namespace velox -template <> -void SimpleVector::setMinMax( - const folly::F14FastMap& metaData); - -template <> -inline void SimpleVector::setMinMax( - const folly::F14FastMap& /*metaData*/) {} - -template <> -inline void SimpleVector>::setMinMax( - const folly::F14FastMap& /*metaData*/) {} - template <> inline bool SimpleVector::equalValueAt( const BaseVector* other, diff --git a/velox/vector/tests/VectorMaker-inl.h b/velox/vector/tests/VectorMaker-inl.h index 892db0530dab..a7292cdf669f 100644 --- a/velox/vector/tests/VectorMaker-inl.h +++ b/velox/vector/tests/VectorMaker-inl.h @@ -18,20 +18,6 @@ #include "velox/vector/tests/VectorMakerStats.h" namespace facebook::velox::test { - -template -folly::F14FastMap getMetadata( - const VectorMakerStats& stats) { - folly::F14FastMap metadata; - if (stats.min.has_value()) { - encodeMetaData(metadata, SimpleVector::META_MIN, stats.min.value()); - } - if (stats.max.has_value()) { - encodeMetaData(metadata, SimpleVector::META_MAX, stats.max.value()); - } - return metadata; -} - template BufferPtr buildBiasedBuffer( const std::vector>& values, @@ -75,9 +61,6 @@ BiasVectorPtr> VectorMaker::biasVector( // Check BiasVector.h for explanation of this calculation. TEvalType bias = min + static_cast(std::ceil(delta / 2.0)); - auto metadata = getMetadata(stats); - encodeMetaData(metadata, BiasVector::BIAS_VALUE, bias); - BufferPtr buffer; TypeKind valueType; @@ -98,7 +81,8 @@ BiasVectorPtr> VectorMaker::biasVector( data.size(), valueType, buffer, - metadata, + std::move(bias), + cdvi::EMPTY_METADATA, stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -152,7 +136,7 @@ SequenceVectorPtr> VectorMaker::sequenceVector( data.size(), flatVectorNullable(sequenceVals), copyToBuffer(sequenceLengths, pool_), - getMetadata(stats), + cdvi::EMPTY_METADATA, stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -177,7 +161,7 @@ ConstantVectorPtr> VectorMaker::constantVector( data.size(), nullCount > 0, (nullCount > 0) ? TEvalType() : folly::copy(*data.front()), - getMetadata(stats)); + cdvi::EMPTY_METADATA); } template @@ -216,7 +200,7 @@ DictionaryVectorPtr> VectorMaker::dictionaryVector( std::move(values), TypeKind::INTEGER, std::move(indices), - getMetadata(stats), + cdvi::EMPTY_METADATA, indexMap.size(), nullCount, stats.isSorted); @@ -244,7 +228,7 @@ FlatVectorPtr> VectorMaker::flatVectorNullable( data.size(), std::move(dataBuffer), std::vector(), - getMetadata(stats), + cdvi::EMPTY_METADATA, stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -275,7 +259,7 @@ FlatVectorPtr> VectorMaker::flatVector( data.size(), std::move(dataBuffer), std::vector(), - getMetadata(stats), + cdvi::EMPTY_METADATA, stats.distinctCount(), stats.nullCount, stats.isSorted); diff --git a/velox/vector/tests/VectorMakerTest.cpp b/velox/vector/tests/VectorMakerTest.cpp index 73fbc090c966..35668d6f605f 100644 --- a/velox/vector/tests/VectorMakerTest.cpp +++ b/velox/vector/tests/VectorMakerTest.cpp @@ -39,8 +39,6 @@ TEST_F(VectorMakerTest, flatVector) { EXPECT_EQ(0, flatVector->getNullCount().value()); EXPECT_FALSE(flatVector->isSorted().value()); EXPECT_EQ(9, flatVector->getDistinctValueCount().value()); - EXPECT_EQ(-123456, flatVector->getMin().value()); - EXPECT_EQ(1024, flatVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { EXPECT_EQ(data[i], flatVector->valueAt(i)); @@ -58,8 +56,6 @@ TEST_F(VectorMakerTest, nullableFlatVector) { EXPECT_EQ(2, flatVector->getNullCount().value()); EXPECT_FALSE(flatVector->isSorted().value()); EXPECT_EQ(8, flatVector->getDistinctValueCount().value()); - EXPECT_EQ(-123456, flatVector->getMin().value()); - EXPECT_EQ(1024, flatVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { if (data[i] == std::nullopt) { @@ -87,8 +83,6 @@ TEST_F(VectorMakerTest, flatVectorString) { EXPECT_EQ(0, flatVector->getNullCount().value()); EXPECT_FALSE(flatVector->isSorted().value()); EXPECT_EQ(5, flatVector->getDistinctValueCount().value()); - EXPECT_EQ(""_sv, flatVector->getMin().value()); - EXPECT_EQ(StringView("world"), flatVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { EXPECT_EQ(data[i], std::string(flatVector->valueAt(i))); @@ -161,8 +155,6 @@ TEST_F(VectorMakerTest, nullableFlatVectorString) { EXPECT_EQ(1, flatVector->getNullCount().value()); EXPECT_FALSE(flatVector->isSorted().value()); EXPECT_EQ(4, flatVector->getDistinctValueCount().value()); - EXPECT_EQ(""_sv, flatVector->getMin().value()); - EXPECT_EQ("world"_sv, flatVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { if (data[i] == std::nullopt) { @@ -190,8 +182,6 @@ TEST_F(VectorMakerTest, nullableFlatVectorBool) { EXPECT_EQ(1, flatVector->getNullCount().value()); EXPECT_FALSE(flatVector->isSorted().value()); EXPECT_EQ(2, flatVector->getDistinctValueCount().value()); - EXPECT_EQ(false, flatVector->getMin().value()); - EXPECT_EQ(true, flatVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { if (data[i] == std::nullopt) { @@ -600,8 +590,6 @@ TEST_F(VectorMakerTest, biasVector) { EXPECT_TRUE(biasVector->mayHaveNulls()); EXPECT_EQ(1, biasVector->getNullCount().value()); EXPECT_FALSE(biasVector->isSorted().value()); - EXPECT_EQ(10, biasVector->getMin().value()); - EXPECT_EQ(15, biasVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { if (data[i] == std::nullopt) { @@ -624,8 +612,6 @@ TEST_F(VectorMakerTest, sequenceVector) { EXPECT_EQ(3, sequenceVector->getNullCount().value()); EXPECT_FALSE(sequenceVector->isSorted().value()); EXPECT_EQ(4, sequenceVector->getDistinctValueCount().value()); - EXPECT_EQ(10, sequenceVector->getMin().value()); - EXPECT_EQ(15, sequenceVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { if (data[i] == std::nullopt) { @@ -657,8 +643,6 @@ TEST_F(VectorMakerTest, sequenceVectorString) { EXPECT_EQ(3, sequenceVector->getNullCount().value()); EXPECT_FALSE(sequenceVector->isSorted().value()); EXPECT_EQ(3, sequenceVector->getDistinctValueCount().value()); - EXPECT_EQ("a"_sv, sequenceVector->getMin().value()); - EXPECT_EQ("c"_sv, sequenceVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { if (data[i] == std::nullopt) { @@ -741,8 +725,6 @@ TEST_F(VectorMakerTest, dictionaryVector) { EXPECT_EQ(2, dictionaryVector->getNullCount().value()); EXPECT_FALSE(dictionaryVector->isSorted().value()); EXPECT_EQ(3, dictionaryVector->getDistinctValueCount().value()); - EXPECT_EQ(10, dictionaryVector->getMin().value()); - EXPECT_EQ(99, dictionaryVector->getMax().value()); for (vector_size_t i = 0; i < data.size(); i++) { if (data[i] == std::nullopt) { diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index 32e33d9cc244..d7ae78c03104 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -137,24 +137,23 @@ class VectorTest : public testing::Test { } auto rawValues = values->asMutable(); int32_t numNulls = 0; - constexpr int32_t kBias = 100; + int32_t bias = 100; for (int32_t i = 0; i < size; ++i) { if (withNulls && i % 3 == 0) { ++numNulls; bits::setNull(rawNulls, i); } else { - rawValues[i] = testValue(i, buffer) - kBias; + rawValues[i] = testValue(i, buffer) - bias; } } - folly::F14FastMap metadata; - metadata[BiasVector::BIAS_VALUE] = folly::to(kBias); return std::make_shared>( pool_.get(), nulls, size, BiasKind, std::move(values), - std::move(metadata), + std::move(bias), + cdvi::EMPTY_METADATA, std::nullopt, numNulls, false, diff --git a/velox/vector/tests/VectorTestUtils.h b/velox/vector/tests/VectorTestUtils.h index 6d9d48ca6593..d68e64d7e375 100644 --- a/velox/vector/tests/VectorTestUtils.h +++ b/velox/vector/tests/VectorTestUtils.h @@ -235,14 +235,6 @@ void assertExtraMetadata( const VectorMakerStats& expectedStats, const SimpleVectorPtr& outVector) { EXPECT_EQ(expectedStats.isSorted, outVector->isSorted().value()); - EXPECT_EQ(expectedStats.min.has_value(), outVector->getMin().has_value()); - EXPECT_EQ(expectedStats.max.has_value(), outVector->getMax().has_value()); - if (expectedStats.min.has_value() && outVector->getMin().has_value()) { - EXPECT_EQ(expectedStats.min.value(), outVector->getMin().value()); - } - if (expectedStats.max.has_value() && outVector->getMax().has_value()) { - EXPECT_EQ(expectedStats.max.value(), outVector->getMax().value()); - } } template