diff --git a/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h b/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h index a8473e976bef..31583c447e42 100644 --- a/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h +++ b/velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h @@ -129,7 +129,7 @@ void SelectiveColumnReader::getFlatValues( true, type, T(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, sizeof(TVector) * rows.size()); return; } diff --git a/velox/expression/EvalCtx.cpp b/velox/expression/EvalCtx.cpp index 2e42fe997ee7..ec58e4d15836 100644 --- a/velox/expression/EvalCtx.cpp +++ b/velox/expression/EvalCtx.cpp @@ -141,7 +141,7 @@ void EvalCtx::addError( AlignedBuffer::allocate( size, pool(), ErrorVector::value_type()), std::vector(0), - cdvi::EMPTY_METADATA, + SimpleVectorStats>{}, 1 /*distinctValueCount*/, size /*nullCount*/, false /*isSorted*/, diff --git a/velox/type/Type.h b/velox/type/Type.h index 2d8f5e365762..8c105f4a63cf 100644 --- a/velox/type/Type.h +++ b/velox/type/Type.h @@ -1564,40 +1564,39 @@ static inline int32_t sizeOfTypeKind(TypeKind kind) { return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(sizeOfTypeKindHelper, kind); } -// Helper for using folly::to with StringView. template -static inline T to(U value) { +static inline T to(const U& value) { return folly::to(value); } template <> -inline Timestamp to(std::string value) { +inline Timestamp to(const std::string& value) { return Timestamp(0, 0); } template <> -inline UnknownValue to(std::string /* value */) { +inline UnknownValue to(const std::string& /* value */) { return UnknownValue(); } template <> -inline std::string to(Timestamp value) { +inline std::string to(const Timestamp& value) { return value.toString(); } template <> -inline std::string to(velox::StringView value) { +inline std::string to(const velox::StringView& value) { return std::string(value.data(), value.size()); } template <> -inline std::string to(ComplexType value) { +inline std::string to(const ComplexType& value) { return std::string("ComplexType"); } template <> -inline ComplexType to(std::string value) { - return ComplexType(); +inline velox::StringView to(const std::string& value) { + return velox::StringView(value); } namespace exec { diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index f468fc1bd11b..8614af4b305d 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -111,7 +111,7 @@ static VectorPtr addDictionary( std::move(vector), TypeKind::INTEGER, std::move(indices), - cdvi::EMPTY_METADATA, + SimpleVectorStats::WrapperType>{}, indicesBuffer->size() / sizeof(vector_size_t) /*distinctValueCount*/, base->getNullCount(), false /*isSorted*/, @@ -144,7 +144,7 @@ addSequence(BufferPtr lengths, vector_size_t size, VectorPtr vector) { size, std::move(vector), std::move(lengths), - cdvi::EMPTY_METADATA, + SimpleVectorStats::WrapperType>{}, std::nullopt /*distinctCount*/, std::nullopt, false /*sorted*/, @@ -177,7 +177,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) { auto singleNull = BaseVector::create(vector->type(), 1, pool); singleNull->setNull(0, true); return std::make_shared>( - pool, size, 0, singleNull, cdvi::EMPTY_METADATA); + pool, size, 0, singleNull, SimpleVectorStats{}); } else { return std::make_shared>( pool, size, true, vector->type(), T()); @@ -207,7 +207,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) { } return std::make_shared>( - pool, size, index, std::move(vector), cdvi::EMPTY_METADATA); + pool, size, index, std::move(vector), SimpleVectorStats{}); } // static @@ -234,7 +234,7 @@ static VectorPtr createEmpty( size, std::move(values), std::vector(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, 0 /*distinctValueCount*/, 0 /*nullCount*/, false /*isSorted*/, @@ -332,7 +332,7 @@ VectorPtr BaseVector::create( size, BufferPtr(nullptr), std::vector(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, 1 /*distinctValueCount*/, size /*nullCount*/, true /*isSorted*/, @@ -539,7 +539,7 @@ VectorPtr newConstant( value.isNull(), Type::create(), std::move(copy), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, sizeof(T) /*representedByteCount*/); } @@ -557,7 +557,7 @@ VectorPtr newConstant( value.isNull(), capsule.type, std::shared_ptr(capsule.obj), - cdvi::EMPTY_METADATA, + SimpleVectorStats>{}, sizeof(std::shared_ptr) /*representedByteCount*/); } diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index fff18c324ef8..07ab34b37164 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -43,10 +43,6 @@ namespace facebook { namespace velox { -namespace cdvi { -const folly::F14FastMap EMPTY_METADATA; -} // namespace cdvi - template class SimpleVector; diff --git a/velox/vector/BiasVector-inl.h b/velox/vector/BiasVector-inl.h index 16389b87bf88..eb19005f5d3e 100644 --- a/velox/vector/BiasVector-inl.h +++ b/velox/vector/BiasVector-inl.h @@ -46,8 +46,6 @@ static inline __m256i simdAdd(const __m256i& left, const __m256i& right) { * 2 buffers. * * Buffer order: [nullData, valueData] - * - * The bias value is stored in the metaData block via the BIAS_VALUE key. */ template BiasVector::BiasVector( @@ -56,7 +54,8 @@ BiasVector::BiasVector( size_t length, TypeKind valueType, BufferPtr values, - const folly::F14FastMap& metaData, + T bias, + const SimpleVectorStats& stats, std::optional distinctCount, std::optional nullCount, std::optional sorted, @@ -66,23 +65,20 @@ BiasVector::BiasVector( pool, nulls, length, - metaData, + stats, distinctCount, nullCount, sorted, representedBytes, storageByteCount), valueType_(valueType), - values_(std::move(values)) { + values_(std::move(values)), + bias_(bias) { VELOX_CHECK( valueType_ == TypeKind::INTEGER || valueType_ == TypeKind::SMALLINT || valueType_ == TypeKind::TINYINT, "Invalid array type for biased values"); - auto bias = - SimpleVector::template getMetaDataValue(metaData, BIAS_VALUE); - VELOX_CHECK(bias.has_value(), "Bias value is required"); - bias_ = bias.value(); biasBuffer_ = simd::setAll256i(bias_); switch (valueType_) { @@ -122,7 +118,7 @@ std::unique_ptr> BiasVector::hashAll() const { BaseVector::length_, hashes, std::vector(0) /*stringBuffers*/, - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, std::nullopt /*distinctValueCount*/, 0 /* nullCount */, false /*isSorted*/, diff --git a/velox/vector/BiasVector.h b/velox/vector/BiasVector.h index 3e18f1e95b8c..b43efe130e04 100644 --- a/velox/vector/BiasVector.h +++ b/velox/vector/BiasVector.h @@ -92,7 +92,6 @@ class BiasVector : public SimpleVector { // bias"); public: - static const std::string BIAS_VALUE; static constexpr bool can_simd = (std::is_same::value || std::is_same::value || std::is_same::value); @@ -103,8 +102,8 @@ class BiasVector : public SimpleVector { size_t length, TypeKind valueType, BufferPtr values, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + T bias, + const SimpleVectorStats& stats = {}, std::optional distinctCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional sorted = std::nullopt, @@ -256,9 +255,6 @@ class BiasVector : public SimpleVector { __m256i biasBuffer_; }; -template -const std::string BiasVector::BIAS_VALUE = "CDV1"; - template using BiasVectorPtr = std::shared_ptr>; diff --git a/velox/vector/CMakeLists.txt b/velox/vector/CMakeLists.txt index ecd845fe7f0d..5355b603a0b5 100644 --- a/velox/vector/CMakeLists.txt +++ b/velox/vector/CMakeLists.txt @@ -21,7 +21,6 @@ add_library( LazyVector.cpp SelectivityVector.cpp SequenceVector.cpp - SimpleVector.cpp VectorEncoding.cpp VectorStream.cpp) diff --git a/velox/vector/ConstantVector-inl.h b/velox/vector/ConstantVector-inl.h index 610f215e29ac..5f960e5ec77a 100644 --- a/velox/vector/ConstantVector-inl.h +++ b/velox/vector/ConstantVector-inl.h @@ -24,7 +24,7 @@ std::unique_ptr> ConstantVector::hashAll() const { BaseVector::length_, false /* isNull */, this->hashValueAt(0), - folly::F14FastMap(), + SimpleVectorStats{}, /* stats */ sizeof(uint64_t) * BaseVector::length_ /* representedBytes */); } diff --git a/velox/vector/ConstantVector.cpp b/velox/vector/ConstantVector.cpp index bf10c36525a8..aeb38b893fe0 100644 --- a/velox/vector/ConstantVector.cpp +++ b/velox/vector/ConstantVector.cpp @@ -36,5 +36,11 @@ void ConstantVector>::setValue( VELOX_NYI(); } +template <> +void ConstantVector::setValue(const std::string& /*string*/) { + VELOX_UNSUPPORTED( + "ConstantVectors of ComplexType cannot be initialized from string values."); +} + } // namespace velox } // namespace facebook diff --git a/velox/vector/ConstantVector.h b/velox/vector/ConstantVector.h index 69966805471c..581c0ecb203c 100644 --- a/velox/vector/ConstantVector.h +++ b/velox/vector/ConstantVector.h @@ -45,8 +45,7 @@ class ConstantVector final : public SimpleVector { size_t length, bool isNull, T&& val, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional representedBytes = std::nullopt, std::optional storageByteCount = std::nullopt) : ConstantVector( @@ -55,7 +54,7 @@ class ConstantVector final : public SimpleVector { isNull, CppToType::create(), std::move(val), - metaData, + stats, representedBytes, storageByteCount) {} @@ -65,8 +64,7 @@ class ConstantVector final : public SimpleVector { bool isNull, std::shared_ptr type, T&& val, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional representedBytes = std::nullopt, std::optional storageByteCount = std::nullopt) : SimpleVector( @@ -74,7 +72,7 @@ class ConstantVector final : public SimpleVector { type, BufferPtr(nullptr), length, - metaData, + stats, isNull ? 0 : 1 /* distinctValueCount */, isNull ? length : 0 /* nullCount */, true /*isSorted*/, @@ -114,14 +112,13 @@ class ConstantVector final : public SimpleVector { vector_size_t length, vector_size_t index, VectorPtr base, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA) + const SimpleVectorStats& stats = {}) : SimpleVector( pool, base->type(), BufferPtr(nullptr), length, - metaData, + stats, std::nullopt, std::nullopt, true /*isSorted*/, @@ -413,6 +410,9 @@ void ConstantVector::setValue(const std::string& string); template <> void ConstantVector>::setValue(const std::string& string); +template <> +void ConstantVector::setValue(const std::string& string); + template using ConstantVectorPtr = std::shared_ptr>; diff --git a/velox/vector/DictionaryVector-inl.h b/velox/vector/DictionaryVector-inl.h index 1aa300b24050..b64177519471 100644 --- a/velox/vector/DictionaryVector-inl.h +++ b/velox/vector/DictionaryVector-inl.h @@ -55,7 +55,7 @@ DictionaryVector::DictionaryVector( std::shared_ptr dictionaryValues, TypeKind indexType, BufferPtr dictionaryIndices, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctValueCount, std::optional nullCount, std::optional isSorted, @@ -66,13 +66,13 @@ DictionaryVector::DictionaryVector( dictionaryValues->type(), nulls, length, - metaData, + stats, distinctValueCount, nullCount, isSorted, representedBytes, storageByteCount), - dictionaryMetaData_(metaData) { + dictionaryStats_(stats) { VELOX_CHECK(dictionaryValues != nullptr, "dictionaryValues must not be null"); VELOX_CHECK( dictionaryIndices != nullptr, "dictionaryIndices must not be null"); @@ -139,7 +139,7 @@ std::unique_ptr> DictionaryVector::hashAll() const { BaseVector::length_, hashes, std::vector(0) /* stringBuffers */, - folly::F14FastMap(), + SimpleVectorStats{}, BaseVector::distinctValueCount_.value() + (BaseVector::nullCount_.value_or(0) > 0 ? 1 : 0), 0 /* nullCount */, diff --git a/velox/vector/DictionaryVector.h b/velox/vector/DictionaryVector.h index 378539fba4a3..26603079805e 100644 --- a/velox/vector/DictionaryVector.h +++ b/velox/vector/DictionaryVector.h @@ -55,8 +55,7 @@ class DictionaryVector : public SimpleVector { VectorPtr dictionaryValues, TypeKind indexTypeKind, BufferPtr dictionaryIndexArray, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctValueCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional isSorted = std::nullopt, @@ -114,13 +113,12 @@ class DictionaryVector : public SimpleVector { } /** - * @return metadata for the internal dictionary value vector. They + * @return stats for the internal dictionary value vector. They * hold the min and max value in the dictionary. */ // TODO (T61713241): Remove this later. - inline const folly::F14FastMap& - getDictionaryMetaData() const { - return dictionaryMetaData_; + inline const SimpleVectorStats& getDictionaryStats() const { + return dictionaryStats_; } inline const BufferPtr& indices() const { @@ -238,8 +236,8 @@ class DictionaryVector : public SimpleVector { void setInternalState(); - // metadata over the contained vector data - folly::F14FastMap dictionaryMetaData_; + // stats over the contained vector data + SimpleVectorStats dictionaryStats_; // the dictionary indices of the vector can be variable types depending on the // size of the dictionary - kept as original and typed diff --git a/velox/vector/FlatVector-inl.h b/velox/vector/FlatVector-inl.h index b0049fd1f0eb..33fc906d0daf 100644 --- a/velox/vector/FlatVector-inl.h +++ b/velox/vector/FlatVector-inl.h @@ -111,7 +111,7 @@ std::unique_ptr> FlatVector::hashAll() const { BaseVector::length_, std::move(hashBuffer), std::vector() /*stringBuffers*/, - folly::F14FastMap(), + SimpleVectorStats{}, /* stats */ std::nullopt /*distinctValueCount*/, 0 /*nullCount*/, false /*sorted*/, diff --git a/velox/vector/FlatVector.h b/velox/vector/FlatVector.h index 395a041a5b00..1d06b04fb6e7 100644 --- a/velox/vector/FlatVector.h +++ b/velox/vector/FlatVector.h @@ -57,8 +57,7 @@ class FlatVector final : public SimpleVector { size_t length, BufferPtr values, std::vector&& stringBuffers, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctValueCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional isSorted = std::nullopt, @@ -71,7 +70,7 @@ class FlatVector final : public SimpleVector { length, values, std::move(stringBuffers), - metaData, + stats, distinctValueCount, nullCount, isSorted, @@ -85,8 +84,7 @@ class FlatVector final : public SimpleVector { size_t length, BufferPtr values, std::vector&& stringBuffers, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctValueCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional isSorted = std::nullopt, @@ -97,7 +95,7 @@ class FlatVector final : public SimpleVector { type, std::move(nulls), length, - metaData, + stats, distinctValueCount, nullCount, isSorted, diff --git a/velox/vector/SequenceVector-inl.h b/velox/vector/SequenceVector-inl.h index be22c6e780d3..39ef8399a9ad 100644 --- a/velox/vector/SequenceVector-inl.h +++ b/velox/vector/SequenceVector-inl.h @@ -27,7 +27,7 @@ SequenceVector::SequenceVector( size_t length, std::shared_ptr sequenceValues, BufferPtr sequenceLengths, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctCount, std::optional nullCount, std::optional isSorted, @@ -38,7 +38,7 @@ SequenceVector::SequenceVector( sequenceValues->type(), BufferPtr(nullptr), length, - metaData, + stats, distinctCount, nullCount, isSorted, @@ -102,7 +102,7 @@ std::unique_ptr> SequenceVector::hashAll() const { sequenceCount, hashes, std::vector(0) /*stringBuffers*/, - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, /*stats*/ std::nullopt /*distinctValueCount*/, 0 /* nullCount */, false /* sorted */, @@ -113,7 +113,7 @@ std::unique_ptr> SequenceVector::hashAll() const { BaseVector::length_, std::move(hashValues), sequenceLengths_, - folly::F14FastMap(), + SimpleVectorStats{}, /*stats*/ std::nullopt /*distinctCount*/, 0 /* nullCount */, false /* sorted */, diff --git a/velox/vector/SequenceVector.h b/velox/vector/SequenceVector.h index 02bc2ba37868..83f60f031e9d 100644 --- a/velox/vector/SequenceVector.h +++ b/velox/vector/SequenceVector.h @@ -44,8 +44,7 @@ class SequenceVector : public SimpleVector { size_t length, std::shared_ptr sequenceValues, BufferPtr sequenceLengths, - const folly::F14FastMap& metaData = - cdvi::EMPTY_METADATA, + const SimpleVectorStats& stats = {}, std::optional distinctCount = std::nullopt, std::optional nullCount = std::nullopt, std::optional sorted = std::nullopt, diff --git a/velox/vector/SimpleVector.cpp b/velox/vector/SimpleVector.cpp deleted file mode 100644 index a037e3e1d7c9..000000000000 --- a/velox/vector/SimpleVector.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/vector/SimpleVector.h" -#include "velox/common/base/Exceptions.h" - -namespace facebook { -namespace velox { - -template <> -void SimpleVector::setMinMax( - const folly::F14FastMap& metaData) { - auto it = metaData.find(META_MIN); - if (it != metaData.end()) { - minString_ = it->second; - min_ = StringView(minString_); - } - it = metaData.find(META_MAX); - if (it != metaData.end()) { - maxString_ = it->second; - max_ = StringView(maxString_); - } -} - -} // namespace velox -} // namespace facebook diff --git a/velox/vector/SimpleVector.h b/velox/vector/SimpleVector.h index 6512a67b5c08..a0abeb926283 100644 --- a/velox/vector/SimpleVector.h +++ b/velox/vector/SimpleVector.h @@ -38,33 +38,11 @@ namespace exec { class EvalCtx; } -/** - * Adds the given value to the metaData in a proper encoding. Will overwrite - * any existing value in the map. - */ -template -void encodeMetaData( - folly::F14FastMap& metaData, - const std::string& key, - V value) { - metaData.insert_or_assign(key, velox::to(value)); -} - -template <> -inline void encodeMetaData( - folly::F14FastMap& metaData, - const std::string& key, - StringView value) { - metaData.insert_or_assign(key, std::string(value.data(), value.size())); -} - -template <> -inline void encodeMetaData( - folly::F14FastMap& /*metaData*/, - const std::string& /*key*/, - std::shared_ptr /*value*/) { - VELOX_NYI(); -} +template +struct SimpleVectorStats { + std::optional min; + std::optional max; +}; // This class abstracts over various Columnar Storage Formats such that Velox // can select the most appropriate one on a per field / per block basis. @@ -78,15 +56,12 @@ inline void encodeMetaData( template class SimpleVector : public BaseVector { public: - constexpr static auto META_MIN = folly::makeFixedString("CTV1"); - constexpr static auto META_MAX = folly::makeFixedString("CTV2"); - SimpleVector( velox::memory::MemoryPool* pool, std::shared_ptr type, BufferPtr nulls, size_t length, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctValueCount, std::optional nullCount, std::optional isSorted, @@ -102,16 +77,15 @@ class SimpleVector : public BaseVector { representedByteCount, storageByteCount), isSorted_(isSorted), - elementSize_(sizeof(T)) { - setMinMax(metaData); - } + elementSize_(sizeof(T)), + stats_(stats) {} // Constructs SimpleVector inferring the type from T. SimpleVector( velox::memory::MemoryPool* pool, BufferPtr nulls, size_t length, - const folly::F14FastMap& metaData, + const SimpleVectorStats& stats, std::optional distinctValueCount, std::optional nullCount, std::optional isSorted, @@ -122,7 +96,7 @@ class SimpleVector : public BaseVector { CppToType::create(), std::move(nulls), length, - metaData, + stats, distinctValueCount, nullCount, isSorted, @@ -131,15 +105,8 @@ class SimpleVector : public BaseVector { virtual ~SimpleVector() override {} - folly::F14FastMap genMetaData() const { - folly::F14FastMap metaData; - if (min_.hasValue()) { - encodeMetaData(metaData, META_MIN, min_.value()); - } - if (max_.hasValue()) { - encodeMetaData(metaData, META_MAX, max_.value()); - } - return metaData; + SimpleVectorStats getStats() const { + return stats_; } // Concrete Vector types need to implement this themselves. @@ -182,11 +149,11 @@ class SimpleVector : public BaseVector { } const std::optional& getMin() const { - return min_; + return stats_.min; } const std::optional& getMax() const { - return max_; + return stats_.max; } void resize(vector_size_t size, bool setNotNull = true) override { @@ -353,18 +320,6 @@ class SimpleVector : public BaseVector { sizeof(T)); } - std::optional min_; - std::optional max_; - // Holds the data for StringView min/max. - std::string minString_; - std::string maxString_; - - private: - void setMinMax(const folly::F14FastMap& metaData) { - min_ = getMetaDataValue(metaData, META_MIN); - max_ = getMetaDataValue(metaData, META_MAX); - } - protected: int comparePrimitiveAsc(const T& left, const T& right) const { if constexpr (std::is_floating_point::value) { @@ -393,19 +348,9 @@ class SimpleVector : public BaseVector { // If T is StringView, store set of rows // where we have computed asciiness. A set bit means the row was processed. SelectivityVector asciiSetRows_; -}; // namespace velox -template <> -void SimpleVector::setMinMax( - const folly::F14FastMap& metaData); - -template <> -inline void SimpleVector::setMinMax( - const folly::F14FastMap& /*metaData*/) {} - -template <> -inline void SimpleVector>::setMinMax( - const folly::F14FastMap& /*metaData*/) {} + SimpleVectorStats stats_; +}; // namespace velox template <> inline std::optional SimpleVector::compare( diff --git a/velox/vector/arrow/Bridge.cpp b/velox/vector/arrow/Bridge.cpp index f47d2948b2fc..3d9db20693e9 100644 --- a/velox/vector/arrow/Bridge.cpp +++ b/velox/vector/arrow/Bridge.cpp @@ -595,7 +595,7 @@ VectorPtr createFlatVector( length, values, std::vector(), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, std::nullopt, nullCount == -1 ? std::nullopt : std::optional(nullCount)); } @@ -636,7 +636,7 @@ VectorPtr createStringFlatVector( length, stringViews, std::move(stringViewBuffers), - cdvi::EMPTY_METADATA, + SimpleVectorStats{}, std::nullopt, nullCount == -1 ? std::nullopt : std::optional(nullCount)); } diff --git a/velox/vector/tests/VectorMaker-inl.h b/velox/vector/tests/VectorMaker-inl.h index 892db0530dab..216379152a86 100644 --- a/velox/vector/tests/VectorMaker-inl.h +++ b/velox/vector/tests/VectorMaker-inl.h @@ -15,23 +15,12 @@ */ #include + +#include "velox/vector/SimpleVector.h" #include "velox/vector/tests/VectorMakerStats.h" namespace facebook::velox::test { -template -folly::F14FastMap getMetadata( - const VectorMakerStats& stats) { - folly::F14FastMap metadata; - if (stats.min.has_value()) { - encodeMetaData(metadata, SimpleVector::META_MIN, stats.min.value()); - } - if (stats.max.has_value()) { - encodeMetaData(metadata, SimpleVector::META_MAX, stats.max.value()); - } - return metadata; -} - template BufferPtr buildBiasedBuffer( const std::vector>& values, @@ -75,9 +64,6 @@ BiasVectorPtr> VectorMaker::biasVector( // Check BiasVector.h for explanation of this calculation. TEvalType bias = min + static_cast(std::ceil(delta / 2.0)); - auto metadata = getMetadata(stats); - encodeMetaData(metadata, BiasVector::BIAS_VALUE, bias); - BufferPtr buffer; TypeKind valueType; @@ -98,7 +84,8 @@ BiasVectorPtr> VectorMaker::biasVector( data.size(), valueType, buffer, - metadata, + bias, + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -152,7 +139,7 @@ SequenceVectorPtr> VectorMaker::sequenceVector( data.size(), flatVectorNullable(sequenceVals), copyToBuffer(sequenceLengths, pool_), - getMetadata(stats), + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -177,7 +164,7 @@ ConstantVectorPtr> VectorMaker::constantVector( data.size(), nullCount > 0, (nullCount > 0) ? TEvalType() : folly::copy(*data.front()), - getMetadata(stats)); + stats.asSimpleVectorStats()); } template @@ -216,7 +203,7 @@ DictionaryVectorPtr> VectorMaker::dictionaryVector( std::move(values), TypeKind::INTEGER, std::move(indices), - getMetadata(stats), + stats.asSimpleVectorStats(), indexMap.size(), nullCount, stats.isSorted); @@ -244,7 +231,7 @@ FlatVectorPtr> VectorMaker::flatVectorNullable( data.size(), std::move(dataBuffer), std::vector(), - getMetadata(stats), + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); @@ -275,7 +262,7 @@ FlatVectorPtr> VectorMaker::flatVector( data.size(), std::move(dataBuffer), std::vector(), - getMetadata(stats), + stats.asSimpleVectorStats(), stats.distinctCount(), stats.nullCount, stats.isSorted); diff --git a/velox/vector/tests/VectorMakerStats.h b/velox/vector/tests/VectorMakerStats.h index 967f690ae0ae..7815704e2344 100644 --- a/velox/vector/tests/VectorMakerStats.h +++ b/velox/vector/tests/VectorMakerStats.h @@ -19,6 +19,8 @@ #include #include +#include "velox/vector/SimpleVector.h" + namespace facebook::velox::test { // Struct that caries metadata about a vector of nullable elements. @@ -33,6 +35,10 @@ class VectorMakerStats { return distinctSet_.size(); } + SimpleVectorStats asSimpleVectorStats() { + return {min, max}; + } + std::optional min; std::optional max; size_t nullCount{0}; @@ -44,16 +50,17 @@ class VectorMakerStats { // Generates VectorMakerStats for a given vector of nullable elements. template -VectorMakerStats genVectorMakerStats( +VectorMakerStats::NativeType> genVectorMakerStats( const std::vector>& data) { - VectorMakerStats result; + using NativeType = typename CppToType::NativeType; + VectorMakerStats result; // Count distinct and null elements. for (const auto& val : data) { if (val == std::nullopt) { ++result.nullCount; } else { - result.addElement(*val); + result.addElement(static_cast(*val)); } } @@ -63,10 +70,13 @@ VectorMakerStats genVectorMakerStats( // Calculate min and max (skip null elements). for (const auto& val : data) { if (val != std::nullopt) { - result.min = - (result.min == std::nullopt) ? val : std::min(*result.min, *val); - result.max = - (result.max == std::nullopt) ? val : std::max(*result.max, *val); + auto nativeVal = static_cast(*val); + result.min = (result.min == std::nullopt) + ? nativeVal + : std::min(*result.min, nativeVal); + result.max = (result.max == std::nullopt) + ? nativeVal + : std::max(*result.max, nativeVal); } } return result; @@ -74,19 +84,21 @@ VectorMakerStats genVectorMakerStats( // Generates VectorMakerStats for a given vector of non-nullable elements. template -VectorMakerStats genVectorMakerStats(const std::vector& data) { - VectorMakerStats result; +VectorMakerStats::NativeType> genVectorMakerStats( + const std::vector& data) { + using NativeType = typename CppToType::NativeType; + VectorMakerStats result; for (const auto& val : data) { - result.addElement(val); + result.addElement(static_cast(val)); } result.isSorted = std::is_sorted(data.begin(), data.end()); const auto& [min, max] = std::minmax_element(data.begin(), data.end()); if (min != data.end()) { - result.min = *min; + result.min = static_cast(*min); } if (max != data.end()) { - result.max = *max; + result.max = static_cast(*max); } return result; } diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index 82779c730384..b62764048adb 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -25,6 +25,7 @@ #include "velox/vector/DecodedVector.h" #include "velox/vector/FlatVector.h" #include "velox/vector/LazyVector.h" +#include "velox/vector/SimpleVector.h" #include "velox/vector/TypeAliases.h" #include "velox/vector/VectorTypeUtils.h" #include "velox/vector/tests/VectorMaker.h" @@ -148,15 +149,14 @@ class VectorTest : public testing::Test { rawValues[i] = testValue(i, buffer) - kBias; } } - folly::F14FastMap metadata; - metadata[BiasVector::BIAS_VALUE] = folly::to(kBias); return std::make_shared>( pool_.get(), nulls, size, BiasKind, std::move(values), - std::move(metadata), + kBias, + SimpleVectorStats{}, std::nullopt, numNulls, false, diff --git a/velox/vector/tests/VectorTestBase.h b/velox/vector/tests/VectorTestBase.h index ac347a4aee2c..7e3babc2a2c4 100644 --- a/velox/vector/tests/VectorTestBase.h +++ b/velox/vector/tests/VectorTestBase.h @@ -468,7 +468,7 @@ class VectorTestBase { /*isNull=*/!value.has_value(), CppToType::create(), value ? EvalType(*value) : EvalType(), - cdvi::EMPTY_METADATA, + SimpleVectorStats>{}, sizeof(EvalType)); }