Skip to content

Commit

Permalink
Replace SimpleVector metadata with strongly typed SimpleVectorStats (f…
Browse files Browse the repository at this point in the history
…acebookincubator#1347)

Summary:
Pull Request resolved: facebookincubator#1347

SimpleVector uses a string-ly typed map to store metadata.  This seems to be used primarily for storing min/max stats as well as the bias for BiasVectors.

This is inefficient because we end up de/serializing back and forth from strings, and doesn't work for all types (e.g. opaque types).  This causes issues in VectorMaker as it tries to initialize stats when creating vectors, so we can't create vectors of Opaque types.

Since SimpleVector is templated on the underlying type, I add a SimpleVectorStats object, also templated on this type, and pass this to the vector's constructor instead of the map of strings.  This way min/max don't need to be serialized/deserialized, and it works for any type.

Reviewed By: laithsakka

Differential Revision: D35363043

fbshipit-source-id: 674cb6d8c2ac4d47a94c9ac9a2138fae4d618c86
  • Loading branch information
kevinwilfong authored and artem.malyshev committed May 31, 2022
1 parent c364d69 commit f18b40d
Show file tree
Hide file tree
Showing 24 changed files with 117 additions and 225 deletions.
2 changes: 1 addition & 1 deletion velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void SelectiveColumnReader::getFlatValues(
true,
type,
T(),
cdvi::EMPTY_METADATA,
SimpleVectorStats<TVector>{},
sizeof(TVector) * rows.size());
return;
}
Expand Down
2 changes: 1 addition & 1 deletion velox/expression/EvalCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ void EvalCtx::addError(
AlignedBuffer::allocate<ErrorVector::value_type>(
size, pool(), ErrorVector::value_type()),
std::vector<BufferPtr>(0),
cdvi::EMPTY_METADATA,
SimpleVectorStats<std::shared_ptr<void>>{},
1 /*distinctValueCount*/,
size /*nullCount*/,
false /*isSorted*/,
Expand Down
17 changes: 8 additions & 9 deletions velox/type/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -1564,40 +1564,39 @@ static inline int32_t sizeOfTypeKind(TypeKind kind) {
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(sizeOfTypeKindHelper, kind);
}

// Helper for using folly::to with StringView.
template <typename T, typename U>
static inline T to(U value) {
static inline T to(const U& value) {
return folly::to<T>(value);
}

template <>
inline Timestamp to(std::string value) {
inline Timestamp to(const std::string& value) {
return Timestamp(0, 0);
}

template <>
inline UnknownValue to(std::string /* value */) {
inline UnknownValue to(const std::string& /* value */) {
return UnknownValue();
}

template <>
inline std::string to(Timestamp value) {
inline std::string to(const Timestamp& value) {
return value.toString();
}

template <>
inline std::string to(velox::StringView value) {
inline std::string to(const velox::StringView& value) {
return std::string(value.data(), value.size());
}

template <>
inline std::string to(ComplexType value) {
inline std::string to(const ComplexType& value) {
return std::string("ComplexType");
}

template <>
inline ComplexType to(std::string value) {
return ComplexType();
inline velox::StringView to(const std::string& value) {
return velox::StringView(value);
}

namespace exec {
Expand Down
16 changes: 8 additions & 8 deletions velox/vector/BaseVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static VectorPtr addDictionary(
std::move(vector),
TypeKind::INTEGER,
std::move(indices),
cdvi::EMPTY_METADATA,
SimpleVectorStats<typename KindToFlatVector<kind>::WrapperType>{},
indicesBuffer->size() / sizeof(vector_size_t) /*distinctValueCount*/,
base->getNullCount(),
false /*isSorted*/,
Expand Down Expand Up @@ -144,7 +144,7 @@ addSequence(BufferPtr lengths, vector_size_t size, VectorPtr vector) {
size,
std::move(vector),
std::move(lengths),
cdvi::EMPTY_METADATA,
SimpleVectorStats<typename KindToFlatVector<kind>::WrapperType>{},
std::nullopt /*distinctCount*/,
std::nullopt,
false /*sorted*/,
Expand Down Expand Up @@ -177,7 +177,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) {
auto singleNull = BaseVector::create(vector->type(), 1, pool);
singleNull->setNull(0, true);
return std::make_shared<ConstantVector<T>>(
pool, size, 0, singleNull, cdvi::EMPTY_METADATA);
pool, size, 0, singleNull, SimpleVectorStats<T>{});
} else {
return std::make_shared<ConstantVector<T>>(
pool, size, true, vector->type(), T());
Expand Down Expand Up @@ -207,7 +207,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) {
}

return std::make_shared<ConstantVector<T>>(
pool, size, index, std::move(vector), cdvi::EMPTY_METADATA);
pool, size, index, std::move(vector), SimpleVectorStats<T>{});
}

// static
Expand All @@ -234,7 +234,7 @@ static VectorPtr createEmpty(
size,
std::move(values),
std::vector<BufferPtr>(),
cdvi::EMPTY_METADATA,
SimpleVectorStats<T>{},
0 /*distinctValueCount*/,
0 /*nullCount*/,
false /*isSorted*/,
Expand Down Expand Up @@ -332,7 +332,7 @@ VectorPtr BaseVector::create(
size,
BufferPtr(nullptr),
std::vector<BufferPtr>(),
cdvi::EMPTY_METADATA,
SimpleVectorStats<UnknownValue>{},
1 /*distinctValueCount*/,
size /*nullCount*/,
true /*isSorted*/,
Expand Down Expand Up @@ -539,7 +539,7 @@ VectorPtr newConstant(
value.isNull(),
Type::create<kind>(),
std::move(copy),
cdvi::EMPTY_METADATA,
SimpleVectorStats<T>{},
sizeof(T) /*representedByteCount*/);
}

Expand All @@ -557,7 +557,7 @@ VectorPtr newConstant<TypeKind::OPAQUE>(
value.isNull(),
capsule.type,
std::shared_ptr<void>(capsule.obj),
cdvi::EMPTY_METADATA,
SimpleVectorStats<std::shared_ptr<void>>{},
sizeof(std::shared_ptr<void>) /*representedByteCount*/);
}

Expand Down
4 changes: 0 additions & 4 deletions velox/vector/BaseVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@
namespace facebook {
namespace velox {

namespace cdvi {
const folly::F14FastMap<std::string, std::string> EMPTY_METADATA;
} // namespace cdvi

template <typename T>
class SimpleVector;

Expand Down
16 changes: 6 additions & 10 deletions velox/vector/BiasVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ static inline __m256i simdAdd(const __m256i& left, const __m256i& right) {
* 2 buffers.
*
* Buffer order: [nullData, valueData]
*
* The bias value is stored in the metaData block via the BIAS_VALUE key.
*/
template <typename T>
BiasVector<T>::BiasVector(
Expand All @@ -56,7 +54,8 @@ BiasVector<T>::BiasVector(
size_t length,
TypeKind valueType,
BufferPtr values,
const folly::F14FastMap<std::string, std::string>& metaData,
T bias,
const SimpleVectorStats<T>& stats,
std::optional<vector_size_t> distinctCount,
std::optional<vector_size_t> nullCount,
std::optional<bool> sorted,
Expand All @@ -66,23 +65,20 @@ BiasVector<T>::BiasVector(
pool,
nulls,
length,
metaData,
stats,
distinctCount,
nullCount,
sorted,
representedBytes,
storageByteCount),
valueType_(valueType),
values_(std::move(values)) {
values_(std::move(values)),
bias_(bias) {
VELOX_CHECK(
valueType_ == TypeKind::INTEGER || valueType_ == TypeKind::SMALLINT ||
valueType_ == TypeKind::TINYINT,
"Invalid array type for biased values");

auto bias =
SimpleVector<T>::template getMetaDataValue<T>(metaData, BIAS_VALUE);
VELOX_CHECK(bias.has_value(), "Bias value is required");
bias_ = bias.value();
biasBuffer_ = simd::setAll256i(bias_);

switch (valueType_) {
Expand Down Expand Up @@ -122,7 +118,7 @@ std::unique_ptr<SimpleVector<uint64_t>> BiasVector<T>::hashAll() const {
BaseVector::length_,
hashes,
std::vector<BufferPtr>(0) /*stringBuffers*/,
cdvi::EMPTY_METADATA,
SimpleVectorStats<uint64_t>{},
std::nullopt /*distinctValueCount*/,
0 /* nullCount */,
false /*isSorted*/,
Expand Down
8 changes: 2 additions & 6 deletions velox/vector/BiasVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class BiasVector : public SimpleVector<T> {
// bias");

public:
static const std::string BIAS_VALUE;
static constexpr bool can_simd =
(std::is_same<T, int64_t>::value || std::is_same<T, int32_t>::value ||
std::is_same<T, int16_t>::value);
Expand All @@ -103,8 +102,8 @@ class BiasVector : public SimpleVector<T> {
size_t length,
TypeKind valueType,
BufferPtr values,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
T bias,
const SimpleVectorStats<T>& stats = {},
std::optional<int32_t> distinctCount = std::nullopt,
std::optional<vector_size_t> nullCount = std::nullopt,
std::optional<bool> sorted = std::nullopt,
Expand Down Expand Up @@ -256,9 +255,6 @@ class BiasVector : public SimpleVector<T> {
__m256i biasBuffer_;
};

template <typename T>
const std::string BiasVector<T>::BIAS_VALUE = "CDV1";

template <typename T>
using BiasVectorPtr = std::shared_ptr<BiasVector<T>>;

Expand Down
1 change: 0 additions & 1 deletion velox/vector/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ add_library(
LazyVector.cpp
SelectivityVector.cpp
SequenceVector.cpp
SimpleVector.cpp
VectorEncoding.cpp
VectorStream.cpp)

Expand Down
2 changes: 1 addition & 1 deletion velox/vector/ConstantVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ std::unique_ptr<SimpleVector<uint64_t>> ConstantVector<T>::hashAll() const {
BaseVector::length_,
false /* isNull */,
this->hashValueAt(0),
folly::F14FastMap<std::string, std::string>(),
SimpleVectorStats<uint64_t>{}, /* stats */
sizeof(uint64_t) * BaseVector::length_ /* representedBytes */);
}

Expand Down
6 changes: 6 additions & 0 deletions velox/vector/ConstantVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,11 @@ void ConstantVector<std::shared_ptr<void>>::setValue(
VELOX_NYI();
}

template <>
void ConstantVector<ComplexType>::setValue(const std::string& /*string*/) {
VELOX_UNSUPPORTED(
"ConstantVectors of ComplexType cannot be initialized from string values.");
}

} // namespace velox
} // namespace facebook
18 changes: 9 additions & 9 deletions velox/vector/ConstantVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ class ConstantVector final : public SimpleVector<T> {
size_t length,
bool isNull,
T&& val,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
const SimpleVectorStats<T>& stats = {},
std::optional<ByteCount> representedBytes = std::nullopt,
std::optional<ByteCount> storageByteCount = std::nullopt)
: ConstantVector(
Expand All @@ -55,7 +54,7 @@ class ConstantVector final : public SimpleVector<T> {
isNull,
CppToType<T>::create(),
std::move(val),
metaData,
stats,
representedBytes,
storageByteCount) {}

Expand All @@ -65,16 +64,15 @@ class ConstantVector final : public SimpleVector<T> {
bool isNull,
std::shared_ptr<const Type> type,
T&& val,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
const SimpleVectorStats<T>& stats = {},
std::optional<ByteCount> representedBytes = std::nullopt,
std::optional<ByteCount> storageByteCount = std::nullopt)
: SimpleVector<T>(
pool,
type,
BufferPtr(nullptr),
length,
metaData,
stats,
isNull ? 0 : 1 /* distinctValueCount */,
isNull ? length : 0 /* nullCount */,
true /*isSorted*/,
Expand Down Expand Up @@ -114,14 +112,13 @@ class ConstantVector final : public SimpleVector<T> {
vector_size_t length,
vector_size_t index,
VectorPtr base,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA)
const SimpleVectorStats<T>& stats = {})
: SimpleVector<T>(
pool,
base->type(),
BufferPtr(nullptr),
length,
metaData,
stats,
std::nullopt,
std::nullopt,
true /*isSorted*/,
Expand Down Expand Up @@ -413,6 +410,9 @@ void ConstantVector<StringView>::setValue(const std::string& string);
template <>
void ConstantVector<std::shared_ptr<void>>::setValue(const std::string& string);

template <>
void ConstantVector<ComplexType>::setValue(const std::string& string);

template <typename T>
using ConstantVectorPtr = std::shared_ptr<ConstantVector<T>>;

Expand Down
8 changes: 4 additions & 4 deletions velox/vector/DictionaryVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ DictionaryVector<T>::DictionaryVector(
std::shared_ptr<BaseVector> dictionaryValues,
TypeKind indexType,
BufferPtr dictionaryIndices,
const folly::F14FastMap<std::string, std::string>& metaData,
const SimpleVectorStats<T>& stats,
std::optional<vector_size_t> distinctValueCount,
std::optional<vector_size_t> nullCount,
std::optional<bool> isSorted,
Expand All @@ -66,13 +66,13 @@ DictionaryVector<T>::DictionaryVector(
dictionaryValues->type(),
nulls,
length,
metaData,
stats,
distinctValueCount,
nullCount,
isSorted,
representedBytes,
storageByteCount),
dictionaryMetaData_(metaData) {
dictionaryStats_(stats) {
VELOX_CHECK(dictionaryValues != nullptr, "dictionaryValues must not be null");
VELOX_CHECK(
dictionaryIndices != nullptr, "dictionaryIndices must not be null");
Expand Down Expand Up @@ -139,7 +139,7 @@ std::unique_ptr<SimpleVector<uint64_t>> DictionaryVector<T>::hashAll() const {
BaseVector::length_,
hashes,
std::vector<BufferPtr>(0) /* stringBuffers */,
folly::F14FastMap<std::string, std::string>(),
SimpleVectorStats<uint64_t>{},
BaseVector::distinctValueCount_.value() +
(BaseVector::nullCount_.value_or(0) > 0 ? 1 : 0),
0 /* nullCount */,
Expand Down
14 changes: 6 additions & 8 deletions velox/vector/DictionaryVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ class DictionaryVector : public SimpleVector<T> {
VectorPtr dictionaryValues,
TypeKind indexTypeKind,
BufferPtr dictionaryIndexArray,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
const SimpleVectorStats<T>& stats = {},
std::optional<vector_size_t> distinctValueCount = std::nullopt,
std::optional<vector_size_t> nullCount = std::nullopt,
std::optional<bool> isSorted = std::nullopt,
Expand Down Expand Up @@ -114,13 +113,12 @@ class DictionaryVector : public SimpleVector<T> {
}

/**
* @return metadata for the internal dictionary value vector. They
* @return stats for the internal dictionary value vector. They
* hold the min and max value in the dictionary.
*/
// TODO (T61713241): Remove this later.
inline const folly::F14FastMap<std::string, std::string>&
getDictionaryMetaData() const {
return dictionaryMetaData_;
inline const SimpleVectorStats<T>& getDictionaryStats() const {
return dictionaryStats_;
}

inline const BufferPtr& indices() const {
Expand Down Expand Up @@ -238,8 +236,8 @@ class DictionaryVector : public SimpleVector<T> {

void setInternalState();

// metadata over the contained vector data
folly::F14FastMap<std::string, std::string> dictionaryMetaData_;
// stats over the contained vector data
SimpleVectorStats<T> dictionaryStats_;

// the dictionary indices of the vector can be variable types depending on the
// size of the dictionary - kept as original and typed
Expand Down
Loading

0 comments on commit f18b40d

Please sign in to comment.