Skip to content

Commit

Permalink
Replace SimpleVector metadata with strongly typed SimpleVectorStats (f…
Browse files Browse the repository at this point in the history
…acebookincubator#1347)

Summary:
Pull Request resolved: facebookincubator#1347

SimpleVector uses a string-ly typed map to store metadata.  This seems to be used primarily for storing min/max stats as well as the bias for BiasVectors.

This is inefficient because we end up de/serializing back and forth from strings, and doesn't work for all types (e.g. opaque types).  This causes issues in VectorMaker as it tries to initialize stats when creating vectors, so we can't create vectors of Opaque types.

Since SimpleVector is templated on the underlying type, I add a SimpleVectorStats object, also templated on this type, and pass this to the vector's constructor instead of the map of strings.  This way min/max don't need to be serialized/deserialized, and it works for any type.

Reviewed By: laithsakka

Differential Revision: D35363043

fbshipit-source-id: d1ff8966fa10ed0387ef36863f3d2959d8157917
  • Loading branch information
kevinwilfong authored and facebook-github-bot committed Apr 11, 2022
1 parent fbbfb42 commit c4545a4
Show file tree
Hide file tree
Showing 24 changed files with 111 additions and 219 deletions.
2 changes: 1 addition & 1 deletion velox/dwio/dwrf/reader/SelectiveColumnReaderInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void SelectiveColumnReader::getFlatValues(
true,
type,
T(),
cdvi::EMPTY_METADATA,
SimpleVectorStats<TVector>{},
sizeof(TVector) * rows.size());
return;
}
Expand Down
2 changes: 1 addition & 1 deletion velox/expression/EvalCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ void EvalCtx::addError(
AlignedBuffer::allocate<ErrorVector::value_type>(
size, pool(), ErrorVector::value_type()),
std::vector<BufferPtr>(0),
cdvi::EMPTY_METADATA,
SimpleVectorStats<std::shared_ptr<void>>{},
1 /*distinctValueCount*/,
size /*nullCount*/,
false /*isSorted*/,
Expand Down
5 changes: 2 additions & 3 deletions velox/type/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -1555,7 +1555,6 @@ static inline int32_t sizeOfTypeKind(TypeKind kind) {
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(sizeOfTypeKindHelper, kind);
}

// Helper for using folly::to with StringView.
template <typename T, typename U>
static inline T to(U value) {
return folly::to<T>(value);
Expand Down Expand Up @@ -1587,8 +1586,8 @@ inline std::string to(ComplexType value) {
}

template <>
inline ComplexType to(std::string value) {
return ComplexType();
inline velox::StringView to(const std::string& value) {
return velox::StringView(value);
}

namespace exec {
Expand Down
16 changes: 8 additions & 8 deletions velox/vector/BaseVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static VectorPtr addDictionary(
std::move(vector),
TypeKind::INTEGER,
std::move(indices),
cdvi::EMPTY_METADATA,
SimpleVectorStats<typename KindToFlatVector<kind>::WrapperType>{},
indicesBuffer->size() / sizeof(vector_size_t) /*distinctValueCount*/,
base->getNullCount(),
false /*isSorted*/,
Expand Down Expand Up @@ -144,7 +144,7 @@ addSequence(BufferPtr lengths, vector_size_t size, VectorPtr vector) {
size,
std::move(vector),
std::move(lengths),
cdvi::EMPTY_METADATA,
SimpleVectorStats<typename KindToFlatVector<kind>::WrapperType>{},
std::nullopt /*distinctCount*/,
std::nullopt,
false /*sorted*/,
Expand Down Expand Up @@ -177,7 +177,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) {
auto singleNull = BaseVector::create(vector->type(), 1, pool);
singleNull->setNull(0, true);
return std::make_shared<ConstantVector<T>>(
pool, size, 0, singleNull, cdvi::EMPTY_METADATA);
pool, size, 0, singleNull, SimpleVectorStats<T>{});
} else {
return std::make_shared<ConstantVector<T>>(
pool, size, true, vector->type(), T());
Expand Down Expand Up @@ -207,7 +207,7 @@ addConstant(vector_size_t size, vector_size_t index, VectorPtr vector) {
}

return std::make_shared<ConstantVector<T>>(
pool, size, index, std::move(vector), cdvi::EMPTY_METADATA);
pool, size, index, std::move(vector), SimpleVectorStats<T>{});
}

// static
Expand All @@ -234,7 +234,7 @@ static VectorPtr createEmpty(
size,
std::move(values),
std::vector<BufferPtr>(),
cdvi::EMPTY_METADATA,
SimpleVectorStats<T>{},
0 /*distinctValueCount*/,
0 /*nullCount*/,
false /*isSorted*/,
Expand Down Expand Up @@ -332,7 +332,7 @@ VectorPtr BaseVector::create(
size,
BufferPtr(nullptr),
std::vector<BufferPtr>(),
cdvi::EMPTY_METADATA,
SimpleVectorStats<UnknownValue>{},
1 /*distinctValueCount*/,
size /*nullCount*/,
true /*isSorted*/,
Expand Down Expand Up @@ -539,7 +539,7 @@ VectorPtr newConstant(
value.isNull(),
Type::create<kind>(),
std::move(copy),
cdvi::EMPTY_METADATA,
SimpleVectorStats<T>{},
sizeof(T) /*representedByteCount*/);
}

Expand All @@ -557,7 +557,7 @@ VectorPtr newConstant<TypeKind::OPAQUE>(
value.isNull(),
capsule.type,
std::shared_ptr<void>(capsule.obj),
cdvi::EMPTY_METADATA,
SimpleVectorStats<std::shared_ptr<void>>{},
sizeof(std::shared_ptr<void>) /*representedByteCount*/);
}

Expand Down
4 changes: 0 additions & 4 deletions velox/vector/BaseVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@
namespace facebook {
namespace velox {

namespace cdvi {
const folly::F14FastMap<std::string, std::string> EMPTY_METADATA;
} // namespace cdvi

template <typename T>
class SimpleVector;

Expand Down
16 changes: 6 additions & 10 deletions velox/vector/BiasVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ static inline __m256i simdAdd(const __m256i& left, const __m256i& right) {
* 2 buffers.
*
* Buffer order: [nullData, valueData]
*
* The bias value is stored in the metaData block via the BIAS_VALUE key.
*/
template <typename T>
BiasVector<T>::BiasVector(
Expand All @@ -56,7 +54,8 @@ BiasVector<T>::BiasVector(
size_t length,
TypeKind valueType,
BufferPtr values,
const folly::F14FastMap<std::string, std::string>& metaData,
T bias,
const SimpleVectorStats<T>& stats,
std::optional<vector_size_t> distinctCount,
std::optional<vector_size_t> nullCount,
std::optional<bool> sorted,
Expand All @@ -66,23 +65,20 @@ BiasVector<T>::BiasVector(
pool,
nulls,
length,
metaData,
stats,
distinctCount,
nullCount,
sorted,
representedBytes,
storageByteCount),
valueType_(valueType),
values_(std::move(values)) {
values_(std::move(values)),
bias_(bias) {
VELOX_CHECK(
valueType_ == TypeKind::INTEGER || valueType_ == TypeKind::SMALLINT ||
valueType_ == TypeKind::TINYINT,
"Invalid array type for biased values");

auto bias =
SimpleVector<T>::template getMetaDataValue<T>(metaData, BIAS_VALUE);
VELOX_CHECK(bias.has_value(), "Bias value is required");
bias_ = bias.value();
biasBuffer_ = simd::setAll256i(bias_);

switch (valueType_) {
Expand Down Expand Up @@ -122,7 +118,7 @@ std::unique_ptr<SimpleVector<uint64_t>> BiasVector<T>::hashAll() const {
BaseVector::length_,
hashes,
std::vector<BufferPtr>(0) /*stringBuffers*/,
cdvi::EMPTY_METADATA,
SimpleVectorStats<uint64_t>{},
std::nullopt /*distinctValueCount*/,
0 /* nullCount */,
false /*isSorted*/,
Expand Down
8 changes: 2 additions & 6 deletions velox/vector/BiasVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class BiasVector : public SimpleVector<T> {
// bias");

public:
static const std::string BIAS_VALUE;
static constexpr bool can_simd =
(std::is_same<T, int64_t>::value || std::is_same<T, int32_t>::value ||
std::is_same<T, int16_t>::value);
Expand All @@ -103,8 +102,8 @@ class BiasVector : public SimpleVector<T> {
size_t length,
TypeKind valueType,
BufferPtr values,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
T bias,
const SimpleVectorStats<T>& stats = {},
std::optional<int32_t> distinctCount = std::nullopt,
std::optional<vector_size_t> nullCount = std::nullopt,
std::optional<bool> sorted = std::nullopt,
Expand Down Expand Up @@ -256,9 +255,6 @@ class BiasVector : public SimpleVector<T> {
__m256i biasBuffer_;
};

template <typename T>
const std::string BiasVector<T>::BIAS_VALUE = "CDV1";

template <typename T>
using BiasVectorPtr = std::shared_ptr<BiasVector<T>>;

Expand Down
1 change: 0 additions & 1 deletion velox/vector/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ add_library(
LazyVector.cpp
SelectivityVector.cpp
SequenceVector.cpp
SimpleVector.cpp
VectorEncoding.cpp
VectorStream.cpp)

Expand Down
2 changes: 1 addition & 1 deletion velox/vector/ConstantVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ std::unique_ptr<SimpleVector<uint64_t>> ConstantVector<T>::hashAll() const {
BaseVector::length_,
false /* isNull */,
this->hashValueAt(0),
folly::F14FastMap<std::string, std::string>(),
SimpleVectorStats<uint64_t>{}, /* stats */
sizeof(uint64_t) * BaseVector::length_ /* representedBytes */);
}

Expand Down
6 changes: 6 additions & 0 deletions velox/vector/ConstantVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,11 @@ void ConstantVector<std::shared_ptr<void>>::setValue(
VELOX_NYI();
}

template <>
void ConstantVector<ComplexType>::setValue(const std::string& /*string*/) {
VELOX_UNSUPPORTED(
"ConstantVectors of ComplexType cannot be initialized from string values.");
}

} // namespace velox
} // namespace facebook
18 changes: 9 additions & 9 deletions velox/vector/ConstantVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ class ConstantVector final : public SimpleVector<T> {
size_t length,
bool isNull,
T&& val,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
const SimpleVectorStats<T>& stats = {},
std::optional<ByteCount> representedBytes = std::nullopt,
std::optional<ByteCount> storageByteCount = std::nullopt)
: ConstantVector(
Expand All @@ -55,7 +54,7 @@ class ConstantVector final : public SimpleVector<T> {
isNull,
CppToType<T>::create(),
std::move(val),
metaData,
stats,
representedBytes,
storageByteCount) {}

Expand All @@ -65,16 +64,15 @@ class ConstantVector final : public SimpleVector<T> {
bool isNull,
std::shared_ptr<const Type> type,
T&& val,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
const SimpleVectorStats<T>& stats = {},
std::optional<ByteCount> representedBytes = std::nullopt,
std::optional<ByteCount> storageByteCount = std::nullopt)
: SimpleVector<T>(
pool,
type,
BufferPtr(nullptr),
length,
metaData,
stats,
isNull ? 0 : 1 /* distinctValueCount */,
isNull ? length : 0 /* nullCount */,
true /*isSorted*/,
Expand Down Expand Up @@ -114,14 +112,13 @@ class ConstantVector final : public SimpleVector<T> {
vector_size_t length,
vector_size_t index,
VectorPtr base,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA)
const SimpleVectorStats<T>& stats = {})
: SimpleVector<T>(
pool,
base->type(),
BufferPtr(nullptr),
length,
metaData,
stats,
std::nullopt,
std::nullopt,
true /*isSorted*/,
Expand Down Expand Up @@ -413,6 +410,9 @@ void ConstantVector<StringView>::setValue(const std::string& string);
template <>
void ConstantVector<std::shared_ptr<void>>::setValue(const std::string& string);

template <>
void ConstantVector<ComplexType>::setValue(const std::string& string);

template <typename T>
using ConstantVectorPtr = std::shared_ptr<ConstantVector<T>>;

Expand Down
8 changes: 4 additions & 4 deletions velox/vector/DictionaryVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ DictionaryVector<T>::DictionaryVector(
std::shared_ptr<BaseVector> dictionaryValues,
TypeKind indexType,
BufferPtr dictionaryIndices,
const folly::F14FastMap<std::string, std::string>& metaData,
const SimpleVectorStats<T>& stats,
std::optional<vector_size_t> distinctValueCount,
std::optional<vector_size_t> nullCount,
std::optional<bool> isSorted,
Expand All @@ -66,13 +66,13 @@ DictionaryVector<T>::DictionaryVector(
dictionaryValues->type(),
nulls,
length,
metaData,
stats,
distinctValueCount,
nullCount,
isSorted,
representedBytes,
storageByteCount),
dictionaryMetaData_(metaData) {
dictionaryStats_(stats) {
VELOX_CHECK(dictionaryValues != nullptr, "dictionaryValues must not be null");
VELOX_CHECK(
dictionaryIndices != nullptr, "dictionaryIndices must not be null");
Expand Down Expand Up @@ -139,7 +139,7 @@ std::unique_ptr<SimpleVector<uint64_t>> DictionaryVector<T>::hashAll() const {
BaseVector::length_,
hashes,
std::vector<BufferPtr>(0) /* stringBuffers */,
folly::F14FastMap<std::string, std::string>(),
SimpleVectorStats<uint64_t>{},
BaseVector::distinctValueCount_.value() +
(BaseVector::nullCount_.value_or(0) > 0 ? 1 : 0),
0 /* nullCount */,
Expand Down
14 changes: 6 additions & 8 deletions velox/vector/DictionaryVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ class DictionaryVector : public SimpleVector<T> {
VectorPtr dictionaryValues,
TypeKind indexTypeKind,
BufferPtr dictionaryIndexArray,
const folly::F14FastMap<std::string, std::string>& metaData =
cdvi::EMPTY_METADATA,
const SimpleVectorStats<T>& stats = {},
std::optional<vector_size_t> distinctValueCount = std::nullopt,
std::optional<vector_size_t> nullCount = std::nullopt,
std::optional<bool> isSorted = std::nullopt,
Expand Down Expand Up @@ -114,13 +113,12 @@ class DictionaryVector : public SimpleVector<T> {
}

/**
* @return metadata for the internal dictionary value vector. They
* @return stats for the internal dictionary value vector. They
* hold the min and max value in the dictionary.
*/
// TODO (T61713241): Remove this later.
inline const folly::F14FastMap<std::string, std::string>&
getDictionaryMetaData() const {
return dictionaryMetaData_;
inline const SimpleVectorStats<T>& getDictionaryStats() const {
return dictionaryStats_;
}

inline const BufferPtr& indices() const {
Expand Down Expand Up @@ -238,8 +236,8 @@ class DictionaryVector : public SimpleVector<T> {

void setInternalState();

// metadata over the contained vector data
folly::F14FastMap<std::string, std::string> dictionaryMetaData_;
// stats over the contained vector data
SimpleVectorStats<T> dictionaryStats_;

// the dictionary indices of the vector can be variable types depending on the
// size of the dictionary - kept as original and typed
Expand Down
2 changes: 1 addition & 1 deletion velox/vector/FlatVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ std::unique_ptr<SimpleVector<uint64_t>> FlatVector<T>::hashAll() const {
BaseVector::length_,
std::move(hashBuffer),
std::vector<BufferPtr>() /*stringBuffers*/,
folly::F14FastMap<std::string, std::string>(),
SimpleVectorStats<uint64_t>{}, /* stats */
std::nullopt /*distinctValueCount*/,
0 /*nullCount*/,
false /*sorted*/,
Expand Down
Loading

0 comments on commit c4545a4

Please sign in to comment.