Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove min/max from SimpleVector #1239

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions velox/vector/BiasVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ BiasVector<T>::BiasVector(
size_t length,
TypeKind valueType,
BufferPtr values,
T&& bias,
const folly::F14FastMap<std::string, std::string>& metaData,
std::optional<vector_size_t> distinctCount,
std::optional<vector_size_t> nullCount,
Expand All @@ -73,16 +74,12 @@ BiasVector<T>::BiasVector(
representedBytes,
storageByteCount),
valueType_(valueType),
values_(std::move(values)) {
values_(std::move(values)),
bias_(std::move(bias)) {
VELOX_CHECK(
valueType_ == TypeKind::INTEGER || valueType_ == TypeKind::SMALLINT ||
valueType_ == TypeKind::TINYINT,
"Invalid array type for biased values");

auto bias =
SimpleVector<T>::template getMetaDataValue<T>(metaData, BIAS_VALUE);
VELOX_CHECK(bias.has_value(), "Bias value is required");
bias_ = bias.value();
biasBuffer_ = simd::setAll256i(bias_);

switch (valueType_) {
Expand Down
5 changes: 1 addition & 4 deletions velox/vector/BiasVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class BiasVector : public SimpleVector<T> {
// bias");

public:
static const std::string BIAS_VALUE;
static constexpr bool can_simd =
(std::is_same<T, int64_t>::value || std::is_same<T, int32_t>::value ||
std::is_same<T, int16_t>::value);
Expand All @@ -103,6 +102,7 @@ class BiasVector : public SimpleVector<T> {
size_t length,
TypeKind valueType,
BufferPtr values,
T&& bias,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
std::optional<int32_t> distinctCount = std::nullopt,
Expand Down Expand Up @@ -256,9 +256,6 @@ class BiasVector : public SimpleVector<T> {
__m256i biasBuffer_;
};

template <typename T>
const std::string BiasVector<T>::BIAS_VALUE = "CDV1";

template <typename T>
using BiasVectorPtr = std::shared_ptr<BiasVector<T>>;

Expand Down
1 change: 0 additions & 1 deletion velox/vector/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ add_library(
LazyVector.cpp
SelectivityVector.cpp
SequenceVector.cpp
SimpleVector.cpp
VectorEncoding.cpp
VectorStream.cpp)

Expand Down
39 changes: 0 additions & 39 deletions velox/vector/SimpleVector.cpp

This file was deleted.

77 changes: 1 addition & 76 deletions velox/vector/SimpleVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,34 +37,6 @@ namespace exec {
class EvalCtx;
}

/**
* Adds the given value to the metaData in a proper encoding. Will overwrite
* any existing value in the map.
*/
template <typename V>
void encodeMetaData(
folly::F14FastMap<std::string, std::string>& metaData,
const std::string& key,
V value) {
metaData.insert_or_assign(key, velox::to<std::string>(value));
}

template <>
inline void encodeMetaData(
folly::F14FastMap<std::string, std::string>& metaData,
const std::string& key,
StringView value) {
metaData.insert_or_assign(key, std::string(value.data(), value.size()));
}

template <>
inline void encodeMetaData(
folly::F14FastMap<std::string, std::string>& /*metaData*/,
const std::string& /*key*/,
std::shared_ptr<void> /*value*/) {
VELOX_NYI();
}

// This class abstracts over various Columnar Storage Formats such that Velox
// can select the most appropriate one on a per field / per block basis.
// The goal is to use the most appropriate type to optimize for:
Expand All @@ -77,9 +49,6 @@ inline void encodeMetaData(
template <typename T>
class SimpleVector : public BaseVector {
public:
constexpr static auto META_MIN = folly::makeFixedString("CTV1");
constexpr static auto META_MAX = folly::makeFixedString("CTV2");

SimpleVector(
velox::memory::MemoryPool* pool,
std::shared_ptr<const Type> type,
Expand All @@ -101,9 +70,7 @@ class SimpleVector : public BaseVector {
representedByteCount,
storageByteCount),
isSorted_(isSorted),
elementSize_(sizeof(T)) {
setMinMax(metaData);
}
elementSize_(sizeof(T)) {}

// Constructs SimpleVector inferring the type from T.
SimpleVector(
Expand All @@ -130,17 +97,6 @@ class SimpleVector : public BaseVector {

virtual ~SimpleVector() override {}

folly::F14FastMap<std::string, std::string> genMetaData() const {
folly::F14FastMap<std::string, std::string> metaData;
if (min_.hasValue()) {
encodeMetaData(metaData, META_MIN, min_.value());
}
if (max_.hasValue()) {
encodeMetaData(metaData, META_MAX, max_.value());
}
return metaData;
}

// Concrete Vector types need to implement this themselves.
// This method does not do bounds checking. When the value is null the return
// value is technically undefined (currently implemented as default of T)
Expand Down Expand Up @@ -208,14 +164,6 @@ class SimpleVector : public BaseVector {
return isSorted_;
}

const std::optional<T>& getMin() const {
return min_;
}

const std::optional<T>& getMax() const {
return max_;
}

void resize(vector_size_t size, bool setNotNull = true) override {
VELOX_CHECK(false, "Can only resize flat vectors.");
}
Expand Down Expand Up @@ -380,18 +328,7 @@ class SimpleVector : public BaseVector {
sizeof(T));
}

std::optional<T> min_;
std::optional<T> max_;
// Holds the data for StringView min/max.
std::string minString_;
std::string maxString_;

private:
void setMinMax(const folly::F14FastMap<std::string, std::string>& metaData) {
min_ = getMetaDataValue<T>(metaData, META_MIN);
max_ = getMetaDataValue<T>(metaData, META_MAX);
}

int comparePrimitiveAsc(const T& left, const T& right) const {
if constexpr (std::is_floating_point<T>::value) {
bool isLeftNan = std::isnan(left);
Expand Down Expand Up @@ -422,18 +359,6 @@ class SimpleVector : public BaseVector {
SelectivityVector asciiSetRows_;
}; // namespace velox

template <>
void SimpleVector<StringView>::setMinMax(
const folly::F14FastMap<std::string, std::string>& metaData);

template <>
inline void SimpleVector<ComplexType>::setMinMax(
const folly::F14FastMap<std::string, std::string>& /*metaData*/) {}

template <>
inline void SimpleVector<std::shared_ptr<void>>::setMinMax(
const folly::F14FastMap<std::string, std::string>& /*metaData*/) {}

template <>
inline bool SimpleVector<ComplexType>::equalValueAt(
const BaseVector* other,
Expand Down
30 changes: 7 additions & 23 deletions velox/vector/tests/VectorMaker-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,6 @@
#include "velox/vector/tests/VectorMakerStats.h"

namespace facebook::velox::test {

template <typename T>
folly::F14FastMap<std::string, std::string> getMetadata(
const VectorMakerStats<T>& stats) {
folly::F14FastMap<std::string, std::string> metadata;
if (stats.min.has_value()) {
encodeMetaData(metadata, SimpleVector<T>::META_MIN, stats.min.value());
}
if (stats.max.has_value()) {
encodeMetaData(metadata, SimpleVector<T>::META_MAX, stats.max.value());
}
return metadata;
}

template <typename T, typename ShortType>
BufferPtr buildBiasedBuffer(
const std::vector<std::optional<T>>& values,
Expand Down Expand Up @@ -75,9 +61,6 @@ BiasVectorPtr<VectorMaker::EvalType<T>> VectorMaker::biasVector(
// Check BiasVector.h for explanation of this calculation.
TEvalType bias = min + static_cast<TEvalType>(std::ceil(delta / 2.0));

auto metadata = getMetadata(stats);
encodeMetaData(metadata, BiasVector<TEvalType>::BIAS_VALUE, bias);

BufferPtr buffer;
TypeKind valueType;

Expand All @@ -98,7 +81,8 @@ BiasVectorPtr<VectorMaker::EvalType<T>> VectorMaker::biasVector(
data.size(),
valueType,
buffer,
metadata,
std::move(bias),
cdvi::EMPTY_METADATA,
stats.distinctCount(),
stats.nullCount,
stats.isSorted);
Expand Down Expand Up @@ -152,7 +136,7 @@ SequenceVectorPtr<VectorMaker::EvalType<T>> VectorMaker::sequenceVector(
data.size(),
flatVectorNullable(sequenceVals),
copyToBuffer(sequenceLengths, pool_),
getMetadata(stats),
cdvi::EMPTY_METADATA,
stats.distinctCount(),
stats.nullCount,
stats.isSorted);
Expand All @@ -177,7 +161,7 @@ ConstantVectorPtr<VectorMaker::EvalType<T>> VectorMaker::constantVector(
data.size(),
nullCount > 0,
(nullCount > 0) ? TEvalType() : folly::copy(*data.front()),
getMetadata(stats));
cdvi::EMPTY_METADATA);
}

template <typename T>
Expand Down Expand Up @@ -216,7 +200,7 @@ DictionaryVectorPtr<VectorMaker::EvalType<T>> VectorMaker::dictionaryVector(
std::move(values),
TypeKind::INTEGER,
std::move(indices),
getMetadata(stats),
cdvi::EMPTY_METADATA,
indexMap.size(),
nullCount,
stats.isSorted);
Expand Down Expand Up @@ -244,7 +228,7 @@ FlatVectorPtr<VectorMaker::EvalType<T>> VectorMaker::flatVectorNullable(
data.size(),
std::move(dataBuffer),
std::vector<BufferPtr>(),
getMetadata(stats),
cdvi::EMPTY_METADATA,
stats.distinctCount(),
stats.nullCount,
stats.isSorted);
Expand Down Expand Up @@ -275,7 +259,7 @@ FlatVectorPtr<VectorMaker::EvalType<T>> VectorMaker::flatVector(
data.size(),
std::move(dataBuffer),
std::vector<BufferPtr>(),
getMetadata(stats),
cdvi::EMPTY_METADATA,
stats.distinctCount(),
stats.nullCount,
stats.isSorted);
Expand Down
18 changes: 0 additions & 18 deletions velox/vector/tests/VectorMakerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ TEST_F(VectorMakerTest, flatVector) {
EXPECT_EQ(0, flatVector->getNullCount().value());
EXPECT_FALSE(flatVector->isSorted().value());
EXPECT_EQ(9, flatVector->getDistinctValueCount().value());
EXPECT_EQ(-123456, flatVector->getMin().value());
EXPECT_EQ(1024, flatVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
EXPECT_EQ(data[i], flatVector->valueAt(i));
Expand All @@ -58,8 +56,6 @@ TEST_F(VectorMakerTest, nullableFlatVector) {
EXPECT_EQ(2, flatVector->getNullCount().value());
EXPECT_FALSE(flatVector->isSorted().value());
EXPECT_EQ(8, flatVector->getDistinctValueCount().value());
EXPECT_EQ(-123456, flatVector->getMin().value());
EXPECT_EQ(1024, flatVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
if (data[i] == std::nullopt) {
Expand Down Expand Up @@ -87,8 +83,6 @@ TEST_F(VectorMakerTest, flatVectorString) {
EXPECT_EQ(0, flatVector->getNullCount().value());
EXPECT_FALSE(flatVector->isSorted().value());
EXPECT_EQ(5, flatVector->getDistinctValueCount().value());
EXPECT_EQ(""_sv, flatVector->getMin().value());
EXPECT_EQ(StringView("world"), flatVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
EXPECT_EQ(data[i], std::string(flatVector->valueAt(i)));
Expand Down Expand Up @@ -161,8 +155,6 @@ TEST_F(VectorMakerTest, nullableFlatVectorString) {
EXPECT_EQ(1, flatVector->getNullCount().value());
EXPECT_FALSE(flatVector->isSorted().value());
EXPECT_EQ(4, flatVector->getDistinctValueCount().value());
EXPECT_EQ(""_sv, flatVector->getMin().value());
EXPECT_EQ("world"_sv, flatVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
if (data[i] == std::nullopt) {
Expand Down Expand Up @@ -190,8 +182,6 @@ TEST_F(VectorMakerTest, nullableFlatVectorBool) {
EXPECT_EQ(1, flatVector->getNullCount().value());
EXPECT_FALSE(flatVector->isSorted().value());
EXPECT_EQ(2, flatVector->getDistinctValueCount().value());
EXPECT_EQ(false, flatVector->getMin().value());
EXPECT_EQ(true, flatVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
if (data[i] == std::nullopt) {
Expand Down Expand Up @@ -600,8 +590,6 @@ TEST_F(VectorMakerTest, biasVector) {
EXPECT_TRUE(biasVector->mayHaveNulls());
EXPECT_EQ(1, biasVector->getNullCount().value());
EXPECT_FALSE(biasVector->isSorted().value());
EXPECT_EQ(10, biasVector->getMin().value());
EXPECT_EQ(15, biasVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
if (data[i] == std::nullopt) {
Expand All @@ -624,8 +612,6 @@ TEST_F(VectorMakerTest, sequenceVector) {
EXPECT_EQ(3, sequenceVector->getNullCount().value());
EXPECT_FALSE(sequenceVector->isSorted().value());
EXPECT_EQ(4, sequenceVector->getDistinctValueCount().value());
EXPECT_EQ(10, sequenceVector->getMin().value());
EXPECT_EQ(15, sequenceVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
if (data[i] == std::nullopt) {
Expand Down Expand Up @@ -657,8 +643,6 @@ TEST_F(VectorMakerTest, sequenceVectorString) {
EXPECT_EQ(3, sequenceVector->getNullCount().value());
EXPECT_FALSE(sequenceVector->isSorted().value());
EXPECT_EQ(3, sequenceVector->getDistinctValueCount().value());
EXPECT_EQ("a"_sv, sequenceVector->getMin().value());
EXPECT_EQ("c"_sv, sequenceVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
if (data[i] == std::nullopt) {
Expand Down Expand Up @@ -741,8 +725,6 @@ TEST_F(VectorMakerTest, dictionaryVector) {
EXPECT_EQ(2, dictionaryVector->getNullCount().value());
EXPECT_FALSE(dictionaryVector->isSorted().value());
EXPECT_EQ(3, dictionaryVector->getDistinctValueCount().value());
EXPECT_EQ(10, dictionaryVector->getMin().value());
EXPECT_EQ(99, dictionaryVector->getMax().value());

for (vector_size_t i = 0; i < data.size(); i++) {
if (data[i] == std::nullopt) {
Expand Down
Loading