From 8dae417e8969702063505d2e386bcb48a031b3c5 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Wed, 13 Apr 2022 11:51:33 -0700 Subject: [PATCH] Replace SimpleVector metadata with strongly typed SimpleVectorStats (#1347) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/1347 SimpleVector uses a string-ly typed map to store metadata. This seems to be used primarily for storing min/max stats as well as the bias for BiasVectors. This is inefficient because we end up de/serializing back and forth from strings, and doesn't work for all types (e.g. opaque types). This causes issues in VectorMaker as it tries to initialize stats when creating vectors, so we can't create vectors of Opaque types. Since SimpleVector is templated on the underlying type, I add a SimpleVectorStats object, also templated on this type, and pass this to the vector's constructor instead of the map of strings. This way min/max don't need to be serialized/deserialized, and it works for any type. Reviewed By: laithsakka Differential Revision: D35363043 fbshipit-source-id: 853c979184aef8e420154f73ba5738c98cfc5334 --- .../reader/SelectiveColumnReaderInternal.h | 2 +- velox/expression/EvalCtx.cpp | 2 +- velox/type/Type.h | 17 ++-- velox/vector/BaseVector.cpp | 16 ++-- velox/vector/BaseVector.h | 4 - velox/vector/BiasVector-inl.h | 16 ++-- velox/vector/BiasVector.h | 8 +- velox/vector/CMakeLists.txt | 1 - velox/vector/ConstantVector-inl.h | 2 +- velox/vector/ConstantVector.cpp | 6 ++ velox/vector/ConstantVector.h | 18 ++-- velox/vector/DictionaryVector-inl.h | 8 +- velox/vector/DictionaryVector.h | 14 ++- velox/vector/FlatVector-inl.h | 2 +- velox/vector/FlatVector.h | 10 +-- velox/vector/SequenceVector-inl.h | 8 +- velox/vector/SequenceVector.h | 3 +- velox/vector/SimpleVector.cpp | 39 --------- velox/vector/SimpleVector.h | 87 ++++--------------- velox/vector/arrow/Bridge.cpp | 4 +- velox/vector/tests/VectorMaker-inl.h | 31 ++----- velox/vector/tests/VectorMakerStats.h | 36 +++++--- velox/vector/tests/VectorTest.cpp | 6 +- velox/vector/tests/VectorTestBase.h | 2 +- 24 files changed, 117 insertions(+), 225 deletions(-) delete mode 100644 velox/vector/SimpleVector.cpp diff --git a/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h b/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h index a8473e976bef..31583c447e42 100644 --- a/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h +++ b/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h @@ -129,7 +129,7 @@ void SelectiveColumnReader::getFlatValues( true, type, T(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, sizeof(TVector) * rows.size()); return; } diff --git a/velox/expression/EvalCtx.cpp b/velox/expression/EvalCtx.cpp index 2e42fe997ee7..ec58e4d15836 100644 --- a/velox/expression/EvalCtx.cpp +++ b/velox/expression/EvalCtx.cpp @@ -141,7 +141,7 @@ void EvalCtx::addError( AlignedBuffer::allocate( size, pool(), ErrorVector::value_type()), std::vector(0), - cdvi::EMPTY_METADATA, + SimpleVectorStats>{}, 1 /*distinctValueCount*/, size /*nullCount*/, false /*isSorted*/, diff --git a/velox/type/Type.h b/velox/type/Type.h index 2d8f5e365762..8c105f4a63cf 100644 --- a/velox/type/Type.h +++ b/velox/type/Type.h @@ -1564,40 +1564,39 @@ static inline int32_t sizeOfTypeKind(TypeKind kind) { return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(sizeOfTypeKindHelper, kind); } -// Helper for using folly::to with StringView. template -static inline T to(U value) { +static inline T to(const U& value) { return folly::to(value); } template <> -inline Timestamp to(std::string value) { +inline Timestamp to(const std::string& value) { return Timestamp(0, 0); } template <> -inline UnknownValue to(std::string /* value */) { +inline UnknownValue to(const std::string& /* value */) { return UnknownValue(); } template <> -inline std::string to(Timestamp value) { +inline std::string to(const Timestamp& value) { return value.toString(); } template <> -inline std::string to(velox::StringView value) { +inline std::string to(const velox::StringView& value) { return std::string(value.data(), value.size()); } template <> -inline std::string to(ComplexType value) { +inline std::string to(const ComplexType& value) { return std::string("ComplexType"); } template <> -inline ComplexType to(std::string value) { - return ComplexType(); +inline velox::StringView to(const std::string& value) { + return velox::StringView(value); } namespace exec { diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index f468fc1bd11b..8614af4b305d 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -111,7 +111,7 @@ static VectorPtr addDictionary( std::move(vector), TypeKind::INTEGER, std::move(indices), - cdvi::EMPTY_METADATA, + SimpleVectorStats::WrapperType>{}, indicesBuffer->size() / sizeof(vector_size_t) /*distinctValueCount*/, base->getNullCount(), false /*isSorted*/, @@ -144,7 +144,7 @@ addSequence(BufferPtr lengths, vector_size_t size, VectorPtr vector) { size, std::move(vector), std::move(lengths), - cdvi::EMPTY_METADATA, + SimpleVectorStats::WrapperType>{}, std::nullopt /*distinctCount*/, std::nullopt, false /*sorted*/, @@ -177,7 +177,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) { auto singleNull = BaseVector::create(vector->type(), 1, pool); singleNull->setNull(0, true); return std::make_shared>( - pool, size, 0, singleNull, cdvi::EMPTY_METADATA); + pool, size, 0, singleNull, SimpleVectorStats{}); } else { return std::make_shared>( pool, size, true, vector->type(), T()); @@ -207,7 +207,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) { } return std::make_shared>( - pool, size, index, std::move(vector), cdvi::EMPTY_METADATA); + pool, size, index, std::move(vector), SimpleVectorStats{}); } // static @@ -234,7 +234,7 @@ static VectorPtr createEmpty( size, std::move(values), std::vector(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, 0 /*distinctValueCount*/, 0 /*nullCount*/, false /*isSorted*/, @@ -332,7 +332,7 @@ VectorPtr BaseVector::create( size, BufferPtr(nullptr), std::vector(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, 1 /*distinctValueCount*/, size /*nullCount*/, true /*isSorted*/, @@ -539,7 +539,7 @@ VectorPtr newConstant( value.isNull(), Type::create(), std::move(copy), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, sizeof(T) /*representedByteCount*/); } @@ -557,7 +557,7 @@ VectorPtr newConstant( value.isNull(), capsule.type, std::shared_ptr(capsule.obj), - cdvi::EMPTY_METADATA, + SimpleVectorStats>{}, sizeof(std::shared_ptr) /*representedByteCount*/); } diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index fff18c324ef8..07ab34b37164 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -43,10 +43,6 @@ namespace facebook { namespace velox { -namespace cdvi { -const folly::F14FastMap EMPTY_METADATA; -} // namespace cdvi - template class SimpleVector; diff --git a/velox/vector/BiasVector-inl.h b/velox/vector/BiasVector-inl.h index 16389b87bf88..eb19005f5d3e 100644 --- a/velox/vector/BiasVector-inl.h +++ b/velox/vector/BiasVector-inl.h @@ -46,8 +46,6 @@ static inline __m256i simdAdd(const __m256i& left, const __m256i& right) { * 2 buffers. * * Buffer order: [nullData, valueData] - * - * The bias value is stored in the metaData block via the BIAS_VALUE key. */ template BiasVector::BiasVector( @@ -56,7 +54,8 @@ BiasVector::BiasVector( size_t length, TypeKind valueType, BufferPtr values, - const folly::F14FastMap& metaData, + T bias, + const SimpleVectorStats& stats, std::optional distinctCount, std::optional nullCount, std::optional sorted, @@ -66,23 +65,20 @@ BiasVector::BiasVector( pool, nulls, length, - metaData, + stats, distinctCount, nullCount, sorted, representedBytes, storageByteCount), valueType_(valueType), - values_(std::move(values)) { + values_(std::move(values)), + bias_(bias) { VELOX_CHECK( valueType_ == TypeKind::INTEGER || valueType_ == TypeKind::SMALLINT || valueType_ == TypeKind::TINYINT, "Invalid array type for biased values"); - auto bias = - SimpleVector::template getMetaDataValue(metaData, BIAS_VALUE); - VELOX_CHECK(bias.has_value(), "Bias value is required"); - bias_ = bias.value(); biasBuffer_ = simd::setAll256i(bias_); switch (valueType_) { @@ -122,7 +118,7 @@ std::unique_ptr> BiasVector::hashAll() const { BaseVector::length_, hashes, std::vector(0) /*stringBuffers*/, - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, std::nullopt /*distinctValueCount*/, 0 /* nullCount */, false /*isSorted*/, diff --git a/velox/vector/BiasVector.h b/velox/vector/BiasVector.h index 3e18f1e95b8c..b43efe130e04 100644 --- a/velox/vector/BiasVector.h +++ b/velox/vector/BiasVector.h @@ -92,7 +92,6 @@ class BiasVector : public SimpleVector { // bias"); public: - static const std::string BIAS_VALUE; static constexpr bool can_simd = (std::is_same::value || std::is_same::value || std::is_same::value); @@ -103,8 +102,8 @@ class BiasVector : public SimpleVector { size_t length, TypeKind valueType, BufferPtr values, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + T bias, + const SimpleVectorStats& stats = {}, std::optional distinctCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional sorted = std::nullopt, @@ -256,9 +255,6 @@ class BiasVector : public SimpleVector { __m256i biasBuffer_; }; -template -const std::string BiasVector::BIAS_VALUE = "CDV1"; - template using BiasVectorPtr = std::shared_ptr>; diff --git a/velox/vector/CMakeLists.txt b/velox/vector/CMakeLists.txt index ecd845fe7f0d..5355b603a0b5 100644 --- a/velox/vector/CMakeLists.txt +++ b/velox/vector/CMakeLists.txt @@ -21,7 +21,6 @@ add_library( LazyVector.cpp SelectivityVector.cpp SequenceVector.cpp - SimpleVector.cpp VectorEncoding.cpp VectorStream.cpp) diff --git a/velox/vector/ConstantVector-inl.h b/velox/vector/ConstantVector-inl.h index 610f215e29ac..5f960e5ec77a 100644 --- a/velox/vector/ConstantVector-inl.h +++ b/velox/vector/ConstantVector-inl.h @@ -24,7 +24,7 @@ std::unique_ptr> ConstantVector::hashAll() const { BaseVector::length_, false /* isNull */, this->hashValueAt(0), - folly::F14FastMap(), + SimpleVectorStats{}, /* stats */ sizeof(uint64_t) * BaseVector::length_ /* representedBytes */); } diff --git a/velox/vector/ConstantVector.cpp b/velox/vector/ConstantVector.cpp index bf10c36525a8..aeb38b893fe0 100644 --- a/velox/vector/ConstantVector.cpp +++ b/velox/vector/ConstantVector.cpp @@ -36,5 +36,11 @@ void ConstantVector>::setValue( VELOX_NYI(); } +template <> +void ConstantVector::setValue(const std::string& /*string*/) { + VELOX_UNSUPPORTED( + "ConstantVectors of ComplexType cannot be initialized from string values."); +} + } // namespace velox } // namespace facebook diff --git a/velox/vector/ConstantVector.h b/velox/vector/ConstantVector.h index 69966805471c..581c0ecb203c 100644 --- a/velox/vector/ConstantVector.h +++ b/velox/vector/ConstantVector.h @@ -45,8 +45,7 @@ class ConstantVector final : public SimpleVector { size_t length, bool isNull, T&& val, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional representedBytes = std::nullopt, std::optional storageByteCount = std::nullopt) : ConstantVector( @@ -55,7 +54,7 @@ class ConstantVector final : public SimpleVector { isNull, CppToType::create(), std::move(val), - metaData, + stats, representedBytes, storageByteCount) {} @@ -65,8 +64,7 @@ class ConstantVector final : public SimpleVector { bool isNull, std::shared_ptr type, T&& val, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional representedBytes = std::nullopt, std::optional storageByteCount = std::nullopt) : SimpleVector( @@ -74,7 +72,7 @@ class ConstantVector final : public SimpleVector { type, BufferPtr(nullptr), length, - metaData, + stats, isNull ? 0 : 1 /* distinctValueCount */, isNull ? length : 0 /* nullCount */, true /*isSorted*/, @@ -114,14 +112,13 @@ class ConstantVector final : public SimpleVector { vector_size_t length, vector_size_t index, VectorPtr base, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA) + const SimpleVectorStats& stats = {}) : SimpleVector( pool, base->type(), BufferPtr(nullptr), length, - metaData, + stats, std::nullopt, std::nullopt, true /*isSorted*/, @@ -413,6 +410,9 @@ void ConstantVector::setValue(const std::string& string); template <> void ConstantVector>::setValue(const std::string& string); +template <> +void ConstantVector::setValue(const std::string& string); + template using ConstantVectorPtr = std::shared_ptr>; diff --git a/velox/vector/DictionaryVector-inl.h b/velox/vector/DictionaryVector-inl.h index 1aa300b24050..b64177519471 100644 --- a/velox/vector/DictionaryVector-inl.h +++ b/velox/vector/DictionaryVector-inl.h @@ -55,7 +55,7 @@ DictionaryVector::DictionaryVector( std::shared_ptr dictionaryValues, TypeKind indexType, BufferPtr dictionaryIndices, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctValueCount, std::optional nullCount, std::optional isSorted, @@ -66,13 +66,13 @@ DictionaryVector::DictionaryVector( dictionaryValues->type(), nulls, length, - metaData, + stats, distinctValueCount, nullCount, isSorted, representedBytes, storageByteCount), - dictionaryMetaData_(metaData) { + dictionaryStats_(stats) { VELOX_CHECK(dictionaryValues != nullptr, "dictionaryValues must not be null"); VELOX_CHECK( dictionaryIndices != nullptr, "dictionaryIndices must not be null"); @@ -139,7 +139,7 @@ std::unique_ptr> DictionaryVector::hashAll() const { BaseVector::length_, hashes, std::vector(0) /* stringBuffers */, - folly::F14FastMap(), + SimpleVectorStats{}, BaseVector::distinctValueCount_.value() + (BaseVector::nullCount_.value_or(0) > 0 ? 1 : 0), 0 /* nullCount */, diff --git a/velox/vector/DictionaryVector.h b/velox/vector/DictionaryVector.h index 378539fba4a3..26603079805e 100644 --- a/velox/vector/DictionaryVector.h +++ b/velox/vector/DictionaryVector.h @@ -55,8 +55,7 @@ class DictionaryVector : public SimpleVector { VectorPtr dictionaryValues, TypeKind indexTypeKind, BufferPtr dictionaryIndexArray, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctValueCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional isSorted = std::nullopt, @@ -114,13 +113,12 @@ class DictionaryVector : public SimpleVector { } /** - * @return metadata for the internal dictionary value vector. They + * @return stats for the internal dictionary value vector. They * hold the min and max value in the dictionary. */ // TODO (T61713241): Remove this later. - inline const folly::F14FastMap& - getDictionaryMetaData() const { - return dictionaryMetaData_; + inline const SimpleVectorStats& getDictionaryStats() const { + return dictionaryStats_; } inline const BufferPtr& indices() const { @@ -238,8 +236,8 @@ class DictionaryVector : public SimpleVector { void setInternalState(); - // metadata over the contained vector data - folly::F14FastMap dictionaryMetaData_; + // stats over the contained vector data + SimpleVectorStats dictionaryStats_; // the dictionary indices of the vector can be variable types depending on the // size of the dictionary - kept as original and typed diff --git a/velox/vector/FlatVector-inl.h b/velox/vector/FlatVector-inl.h index b0049fd1f0eb..33fc906d0daf 100644 --- a/velox/vector/FlatVector-inl.h +++ b/velox/vector/FlatVector-inl.h @@ -111,7 +111,7 @@ std::unique_ptr> FlatVector::hashAll() const { BaseVector::length_, std::move(hashBuffer), std::vector() /*stringBuffers*/, - folly::F14FastMap(), + SimpleVectorStats{}, /* stats */ std::nullopt /*distinctValueCount*/, 0 /*nullCount*/, false /*sorted*/, diff --git a/velox/vector/FlatVector.h b/velox/vector/FlatVector.h index 395a041a5b00..1d06b04fb6e7 100644 --- a/velox/vector/FlatVector.h +++ b/velox/vector/FlatVector.h @@ -57,8 +57,7 @@ class FlatVector final : public SimpleVector { size_t length, BufferPtr values, std::vector&& stringBuffers, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctValueCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional isSorted = std::nullopt, @@ -71,7 +70,7 @@ class FlatVector final : public SimpleVector { length, values, std::move(stringBuffers), - metaData, + stats, distinctValueCount, nullCount, isSorted, @@ -85,8 +84,7 @@ class FlatVector final : public SimpleVector { size_t length, BufferPtr values, std::vector&& stringBuffers, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctValueCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional isSorted = std::nullopt, @@ -97,7 +95,7 @@ class FlatVector final : public SimpleVector { type, std::move(nulls), length, - metaData, + stats, distinctValueCount, nullCount, isSorted, diff --git a/velox/vector/SequenceVector-inl.h b/velox/vector/SequenceVector-inl.h index be22c6e780d3..39ef8399a9ad 100644 --- a/velox/vector/SequenceVector-inl.h +++ b/velox/vector/SequenceVector-inl.h @@ -27,7 +27,7 @@ SequenceVector::SequenceVector( size_t length, std::shared_ptr sequenceValues, BufferPtr sequenceLengths, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctCount, std::optional nullCount, std::optional isSorted, @@ -38,7 +38,7 @@ SequenceVector::SequenceVector( sequenceValues->type(), BufferPtr(nullptr), length, - metaData, + stats, distinctCount, nullCount, isSorted, @@ -102,7 +102,7 @@ std::unique_ptr> SequenceVector::hashAll() const { sequenceCount, hashes, std::vector(0) /*stringBuffers*/, - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, /*stats*/ std::nullopt /*distinctValueCount*/, 0 /* nullCount */, false /* sorted */, @@ -113,7 +113,7 @@ std::unique_ptr> SequenceVector::hashAll() const { BaseVector::length_, std::move(hashValues), sequenceLengths_, - folly::F14FastMap(), + SimpleVectorStats{}, /*stats*/ std::nullopt /*distinctCount*/, 0 /* nullCount */, false /* sorted */, diff --git a/velox/vector/SequenceVector.h b/velox/vector/SequenceVector.h index 02bc2ba37868..83f60f031e9d 100644 --- a/velox/vector/SequenceVector.h +++ b/velox/vector/SequenceVector.h @@ -44,8 +44,7 @@ class SequenceVector : public SimpleVector { size_t length, std::shared_ptr sequenceValues, BufferPtr sequenceLengths, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional sorted = std::nullopt, diff --git a/velox/vector/SimpleVector.cpp b/velox/vector/SimpleVector.cpp deleted file mode 100644 index a037e3e1d7c9..000000000000 --- a/velox/vector/SimpleVector.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/vector/SimpleVector.h" -#include "velox/common/base/Exceptions.h" - -namespace facebook { -namespace velox { - -template <> -void SimpleVector::setMinMax( - const folly::F14FastMap& metaData) { - auto it = metaData.find(META_MIN); - if (it != metaData.end()) { - minString_ = it->second; - min_ = StringView(minString_); - } - it = metaData.find(META_MAX); - if (it != metaData.end()) { - maxString_ = it->second; - max_ = StringView(maxString_); - } -} - -} // namespace velox -} // namespace facebook diff --git a/velox/vector/SimpleVector.h b/velox/vector/SimpleVector.h index 6512a67b5c08..a0abeb926283 100644 --- a/velox/vector/SimpleVector.h +++ b/velox/vector/SimpleVector.h @@ -38,33 +38,11 @@ namespace exec { class EvalCtx; } -/** - * Adds the given value to the metaData in a proper encoding. Will overwrite - * any existing value in the map. - */ -template -void encodeMetaData( - folly::F14FastMap& metaData, - const std::string& key, - V value) { - metaData.insert_or_assign(key, velox::to(value)); -} - -template <> -inline void encodeMetaData( - folly::F14FastMap& metaData, - const std::string& key, - StringView value) { - metaData.insert_or_assign(key, std::string(value.data(), value.size())); -} - -template <> -inline void encodeMetaData( - folly::F14FastMap& /*metaData*/, - const std::string& /*key*/, - std::shared_ptr /*value*/) { - VELOX_NYI(); -} +template +struct SimpleVectorStats { + std::optional min; + std::optional max; +}; // This class abstracts over various Columnar Storage Formats such that Velox // can select the most appropriate one on a per field / per block basis. @@ -78,15 +56,12 @@ inline void encodeMetaData( template class SimpleVector : public BaseVector { public: - constexpr static auto META_MIN = folly::makeFixedString("CTV1"); - constexpr static auto META_MAX = folly::makeFixedString("CTV2"); - SimpleVector( velox::memory::MemoryPool* pool, std::shared_ptr type, BufferPtr nulls, size_t length, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctValueCount, std::optional nullCount, std::optional isSorted, @@ -102,16 +77,15 @@ class SimpleVector : public BaseVector { representedByteCount, storageByteCount), isSorted_(isSorted), - elementSize_(sizeof(T)) { - setMinMax(metaData); - } + elementSize_(sizeof(T)), + stats_(stats) {} // Constructs SimpleVector inferring the type from T. SimpleVector( velox::memory::MemoryPool* pool, BufferPtr nulls, size_t length, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctValueCount, std::optional nullCount, std::optional isSorted, @@ -122,7 +96,7 @@ class SimpleVector : public BaseVector { CppToType::create(), std::move(nulls), length, - metaData, + stats, distinctValueCount, nullCount, isSorted, @@ -131,15 +105,8 @@ class SimpleVector : public BaseVector { virtual ~SimpleVector() override {} - folly::F14FastMap genMetaData() const { - folly::F14FastMap metaData; - if (min_.hasValue()) { - encodeMetaData(metaData, META_MIN, min_.value()); - } - if (max_.hasValue()) { - encodeMetaData(metaData, META_MAX, max_.value()); - } - return metaData; + SimpleVectorStats getStats() const { + return stats_; } // Concrete Vector types need to implement this themselves. @@ -182,11 +149,11 @@ class SimpleVector : public BaseVector { } const std::optional& getMin() const { - return min_; + return stats_.min; } const std::optional& getMax() const { - return max_; + return stats_.max; } void resize(vector_size_t size, bool setNotNull = true) override { @@ -353,18 +320,6 @@ class SimpleVector : public BaseVector { sizeof(T)); } - std::optional min_; - std::optional max_; - // Holds the data for StringView min/max. - std::string minString_; - std::string maxString_; - - private: - void setMinMax(const folly::F14FastMap& metaData) { - min_ = getMetaDataValue(metaData, META_MIN); - max_ = getMetaDataValue(metaData, META_MAX); - } - protected: int comparePrimitiveAsc(const T& left, const T& right) const { if constexpr (std::is_floating_point::value) { @@ -393,19 +348,9 @@ class SimpleVector : public BaseVector { // If T is StringView, store set of rows // where we have computed asciiness. A set bit means the row was processed. SelectivityVector asciiSetRows_; -}; // namespace velox -template <> -void SimpleVector::setMinMax( - const folly::F14FastMap& metaData); - -template <> -inline void SimpleVector::setMinMax( - const folly::F14FastMap& /*metaData*/) {} - -template <> -inline void SimpleVector>::setMinMax( - const folly::F14FastMap& /*metaData*/) {} + SimpleVectorStats stats_; +}; // namespace velox template <> inline std::optional SimpleVector::compare( diff --git a/velox/vector/arrow/Bridge.cpp b/velox/vector/arrow/Bridge.cpp index f47d2948b2fc..3d9db20693e9 100644 --- a/velox/vector/arrow/Bridge.cpp +++ b/velox/vector/arrow/Bridge.cpp @@ -595,7 +595,7 @@ VectorPtr createFlatVector( length, values, std::vector(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, std::nullopt, nullCount == -1 ? std::nullopt : std::optional(nullCount)); } @@ -636,7 +636,7 @@ VectorPtr createStringFlatVector( length, stringViews, std::move(stringViewBuffers), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, std::nullopt, nullCount == -1 ? std::nullopt : std::optional(nullCount)); } diff --git a/velox/vector/tests/VectorMaker-inl.h b/velox/vector/tests/VectorMaker-inl.h index 892db0530dab..216379152a86 100644 --- a/velox/vector/tests/VectorMaker-inl.h +++ b/velox/vector/tests/VectorMaker-inl.h @@ -15,23 +15,12 @@ */ #include + +#include "velox/vector/SimpleVector.h" #include "velox/vector/tests/VectorMakerStats.h" namespace facebook::velox::test { -template -folly::F14FastMap getMetadata( - const VectorMakerStats& stats) { - folly::F14FastMap metadata; - if (stats.min.has_value()) { - encodeMetaData(metadata, SimpleVector::META_MIN, stats.min.value()); - } - if (stats.max.has_value()) { - encodeMetaData(metadata, SimpleVector::META_MAX, stats.max.value()); - } - return metadata; -} - template BufferPtr buildBiasedBuffer( const std::vector>& values, @@ -75,9 +64,6 @@ BiasVectorPtr> VectorMaker::biasVector( // Check BiasVector.h for explanation of this calculation. TEvalType bias = min + static_cast(std::ceil(delta / 2.0)); - auto metadata = getMetadata(stats); - encodeMetaData(metadata, BiasVector::BIAS_VALUE, bias); - BufferPtr buffer; TypeKind valueType; @@ -98,7 +84,8 @@ BiasVectorPtr> VectorMaker::biasVector( data.size(), valueType, buffer, - metadata, + bias, + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -152,7 +139,7 @@ SequenceVectorPtr> VectorMaker::sequenceVector( data.size(), flatVectorNullable(sequenceVals), copyToBuffer(sequenceLengths, pool_), - getMetadata(stats), + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -177,7 +164,7 @@ ConstantVectorPtr> VectorMaker::constantVector( data.size(), nullCount > 0, (nullCount > 0) ? TEvalType() : folly::copy(*data.front()), - getMetadata(stats)); + stats.asSimpleVectorStats()); } template @@ -216,7 +203,7 @@ DictionaryVectorPtr> VectorMaker::dictionaryVector( std::move(values), TypeKind::INTEGER, std::move(indices), - getMetadata(stats), + stats.asSimpleVectorStats(), indexMap.size(), nullCount, stats.isSorted); @@ -244,7 +231,7 @@ FlatVectorPtr> VectorMaker::flatVectorNullable( data.size(), std::move(dataBuffer), std::vector(), - getMetadata(stats), + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -275,7 +262,7 @@ FlatVectorPtr> VectorMaker::flatVector( data.size(), std::move(dataBuffer), std::vector(), - getMetadata(stats), + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); diff --git a/velox/vector/tests/VectorMakerStats.h b/velox/vector/tests/VectorMakerStats.h index 967f690ae0ae..7815704e2344 100644 --- a/velox/vector/tests/VectorMakerStats.h +++ b/velox/vector/tests/VectorMakerStats.h @@ -19,6 +19,8 @@ #include #include +#include "velox/vector/SimpleVector.h" + namespace facebook::velox::test { // Struct that caries metadata about a vector of nullable elements. @@ -33,6 +35,10 @@ class VectorMakerStats { return distinctSet_.size(); } + SimpleVectorStats asSimpleVectorStats() { + return {min, max}; + } + std::optional min; std::optional max; size_t nullCount{0}; @@ -44,16 +50,17 @@ class VectorMakerStats { // Generates VectorMakerStats for a given vector of nullable elements. template -VectorMakerStats genVectorMakerStats( +VectorMakerStats::NativeType> genVectorMakerStats( const std::vector>& data) { - VectorMakerStats result; + using NativeType = typename CppToType::NativeType; + VectorMakerStats result; // Count distinct and null elements. for (const auto& val : data) { if (val == std::nullopt) { ++result.nullCount; } else { - result.addElement(*val); + result.addElement(static_cast(*val)); } } @@ -63,10 +70,13 @@ VectorMakerStats genVectorMakerStats( // Calculate min and max (skip null elements). for (const auto& val : data) { if (val != std::nullopt) { - result.min = - (result.min == std::nullopt) ? val : std::min(*result.min, *val); - result.max = - (result.max == std::nullopt) ? val : std::max(*result.max, *val); + auto nativeVal = static_cast(*val); + result.min = (result.min == std::nullopt) + ? nativeVal + : std::min(*result.min, nativeVal); + result.max = (result.max == std::nullopt) + ? nativeVal + : std::max(*result.max, nativeVal); } } return result; @@ -74,19 +84,21 @@ VectorMakerStats genVectorMakerStats( // Generates VectorMakerStats for a given vector of non-nullable elements. template -VectorMakerStats genVectorMakerStats(const std::vector& data) { - VectorMakerStats result; +VectorMakerStats::NativeType> genVectorMakerStats( + const std::vector& data) { + using NativeType = typename CppToType::NativeType; + VectorMakerStats result; for (const auto& val : data) { - result.addElement(val); + result.addElement(static_cast(val)); } result.isSorted = std::is_sorted(data.begin(), data.end()); const auto& [min, max] = std::minmax_element(data.begin(), data.end()); if (min != data.end()) { - result.min = *min; + result.min = static_cast(*min); } if (max != data.end()) { - result.max = *max; + result.max = static_cast(*max); } return result; } diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index 82779c730384..b62764048adb 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -25,6 +25,7 @@ #include "velox/vector/DecodedVector.h" #include "velox/vector/FlatVector.h" #include "velox/vector/LazyVector.h" +#include "velox/vector/SimpleVector.h" #include "velox/vector/TypeAliases.h" #include "velox/vector/VectorTypeUtils.h" #include "velox/vector/tests/VectorMaker.h" @@ -148,15 +149,14 @@ class VectorTest : public testing::Test { rawValues[i] = testValue(i, buffer) - kBias; } } - folly::F14FastMap metadata; - metadata[BiasVector::BIAS_VALUE] = folly::to(kBias); return std::make_shared>( pool_.get(), nulls, size, BiasKind, std::move(values), - std::move(metadata), + kBias, + SimpleVectorStats{}, std::nullopt, numNulls, false, diff --git a/velox/vector/tests/VectorTestBase.h b/velox/vector/tests/VectorTestBase.h index ac347a4aee2c..7e3babc2a2c4 100644 --- a/velox/vector/tests/VectorTestBase.h +++ b/velox/vector/tests/VectorTestBase.h @@ -468,7 +468,7 @@ class VectorTestBase { /*isNull=*/!value.has_value(), CppToType::create(), value ? EvalType(*value) : EvalType(), - cdvi::EMPTY_METADATA, + SimpleVectorStats>{}, sizeof(EvalType)); }