Skip to content

Commit

Permalink
Cast support to generic (#1417)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #1417

- This diff adds the support of type access and cast to generics.
- It introduces 4 functions that can be called on the GenericView:
   - type()
   - kind()
   -  castTo<T>: performs an unchecked cast and returns arg_type<T>. A safety debug time type
      check will happen.
   -  tryCastTo<T>: return std::optional<returns arg_type<T>>, performs  unchecked cast. return
      std::null opt if T does not match the type of the vector.

- **Cost**:
   - The first time we do the cast we create the readers corresponding to that type. Then for the
coming rows, the cost is a couple of instructions; checking reader is created,  accessing the reader
in the variant and returning the element at the row index. In some non-common cases there is additional
check that the type casted to is consistent across rows.
  - TryCastTo is more expensive, since it does a type check as well.
  - In general its not expensive to use it with complex types, but avoid using it with primitives by either
implementing a function specialized when input is primitive (see == function as example ).  Or casting to
complex types already specialized with primitive types, i.e. Array<int> Array<double> instead of Array<Any>
 and then casting Any to int for every element.

- **What can be casted to?**
This diff enabled cast to all basic types plus Array<Any> Map<Any, Any>, Row<Any>, Row<Any, Any> and
Row<Any, Any,..etc> up to 5.. This allow to recursively traverse complex types.

- **How to add a new casted to type?**
  - Any type except Generic<> it self, (Cast to self type) or Variadic<...>.

- The diff adds example function HasDuplicate, which checks if an array has duplicate items.

Differential Revision: D35616634

fbshipit-source-id: 203170e4b680032b0616cc6aee89305dbc57a61f
  • Loading branch information
laithsakka authored and facebook-github-bot committed Apr 16, 2022
1 parent 3ed1558 commit dd36fe8
Show file tree
Hide file tree
Showing 4 changed files with 744 additions and 19 deletions.
225 changes: 219 additions & 6 deletions velox/expression/ComplexViewTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#pragma once
#include <fmt/format.h>
#include <iterator>
#include <optional>

Expand Down Expand Up @@ -921,27 +922,239 @@ inline auto get(const RowView<returnsOptionalValues, Types...>& row) {
return row.template at<I>();
}

template <typename T>
using reader_ptr_t = VectorReader<T>*;
// A set of structs used to perform analysis on a static type to
// collect information needed for signatrue construction.
template <typename T>
struct HasGeneric {
static constexpr bool value() {
return false;
}
};

template <typename T>
struct HasGeneric<Generic<T>> {
static constexpr bool value() {
return true;
}
};

template <typename K, typename V>
struct HasGeneric<Map<K, V>> {
static constexpr bool value() {
return HasGeneric<K>::value() || HasGeneric<V>::value();
}
};

template <typename V>
struct HasGeneric<Array<V>> {
static constexpr bool value() {
return HasGeneric<V>::value();
}
};

template <typename... T>
struct HasGeneric<Row<T...>> {
static constexpr bool value() {
return (HasGeneric<T>::value() || ...);
}
};

// This is basically Array<Any>, Map<Any,Any>, Row<Any....>.
template <typename T>
struct AllGenericExceptTop {
static constexpr bool value() {
return false;
}
};

template <typename V>
struct AllGenericExceptTop<Array<V>> {
static constexpr bool value() {
return isGenericType<V>::value;
}
};

template <typename K, typename V>
struct AllGenericExceptTop<Map<K, V>> {
static constexpr bool value() {
return isGenericType<K>::value && isGenericType<V>::value;
}
};

template <typename... T>
struct AllGenericExceptTop<Row<T...>> {
static constexpr bool value() {
return (isGenericType<T>::value && ...);
}
};

class GenericView {
public:
GenericView(const BaseVector* vector, vector_size_t index)
: vector_(vector), index_(index) {}
friend class GenericViewTest;

GenericView(
const DecodedVector& decoded,
std::array<std::shared_ptr<void>, 3>& castReaders,
TypePtr& castType,
vector_size_t index)
: decoded_(decoded),
castReaders_(castReaders),
castType_(castType),
index_(index) {}

uint64_t hash() const {
return vector_->hashValueAt(index_);
return decoded_.base()->hashValueAt(index_);
}

bool operator==(const GenericView& other) const {
return vector_->equalValueAt(other.vector_, index_, other.index_);
return decoded_.base()->equalValueAt(
other.decoded_.base(), index_, other.index_);
}

std::optional<int64_t> compare(
const GenericView& other,
const CompareFlags flags) const {
return vector_->compare(other.vector_, index_, other.index_, flags);
return decoded_.base()->compare(
other.decoded_.base(), index_, other.index_, flags);
}

TypeKind kind() const {
return decoded_.base()->typeKind();
}

const TypePtr type() const {
return decoded_.base()->type();
}

// If conversion is invalid, behavior is undefined. However, debug time
// checks will throw an exception.
template <typename ToType>
typename VectorReader<ToType>::exec_in_t castTo() const {
VELOX_DCHECK(
CastTypeChecker<ToType>::check(type()),
fmt::format(
"castTo type is not compatible with type of vector, vector type is {}, casted to type is {}",
type()->toString(),
CppToType<ToType>::create()->toString()));

// TODO: We can distinguish if this is a null-free or not null-free
// generic. And based on that determine if we want to call operator[] or
// readNullFree. For now we always return nullable.
return ensureReader<ToType>()->operator[](index_);
}

template <typename ToType>
std::optional<typename VectorReader<ToType>::exec_in_t> tryCastTo() const {
if (!CastTypeChecker<ToType>::check(type())) {
return std::nullopt;
}

return ensureReader<ToType>()->operator[](index_);
}

private:
const BaseVector* vector_;
void CastToTypeNotSupported(const TypePtr& type) const {
VELOX_USER_CHECK(
false,
fmt::format(
"castTo type is not supported: {}. Consider adding the type to readers_variant_t",
type->toString()));
}

// Utility class that checks that vectorType matches T.
template <typename T>
struct CastTypeChecker {
static bool check(const TypePtr& vectorType) {
return CppToType<T>::typeKind == vectorType->kind();
}
};

template <typename T>
struct CastTypeChecker<Generic<T>> {
static bool check(const TypePtr&) {
return true;
}
};

template <typename T>
struct CastTypeChecker<Array<T>> {
static bool check(const TypePtr& vectorType) {
return TypeKind::ARRAY == vectorType->kind() &&
CastTypeChecker<T>::check(vectorType->childAt(0));
}
};

template <typename K, typename V>
struct CastTypeChecker<Map<K, V>> {
static bool check(const TypePtr& vectorType) {
return TypeKind::MAP == vectorType->kind() &&
CastTypeChecker<K>::check(vectorType->childAt(0)) &&
CastTypeChecker<V>::check(vectorType->childAt(1));
}
};

template <typename... T>
struct CastTypeChecker<Row<T...>> {
static bool check(const TypePtr& vectorType) {
int index = 0;
return TypeKind::ROW == vectorType->kind() &&
(CastTypeChecker<T>::check(vectorType->childAt(index++)) && ... &&
true);
}
};

template <typename B>
VectorReader<B>* ensureReader() const {
static_assert(
!isGenericType<B>::value && !isVariadicType<B>::value,
"That does not make any sense! You cant cast to Generic or Variadic");

// This is an optimization to avoid checking dynamically for every row that
// the user is always casting to the same type.
// Types are divided into three sets, for 1, and 2 we do not do the check,
// since no two types can ever refer to the same vector.

if constexpr (!HasGeneric<B>::value()) {
// Two types with no generic can never represent same vector.
return ensureReaderImpl<B, 0>();
} else {
if constexpr (AllGenericExceptTop<B>::value()) {
// This is basically Array<Any>, Map<Any,Any>, Row<Any....>.
return ensureReaderImpl<B, 1>();
} else {
auto requestedType = CppToType<B>::create();
if (castType_) {
VELOX_USER_CHECK(
castType_->operator==(*requestedType),
fmt::format(
"Not allowed to cast to the two types {} and {} within the same batch."
"Consider creating a new type set to allow it.",
castType_->toString(),
requestedType->toString()));
} else {
castType_ = std::move(requestedType);
}
return ensureReaderImpl<B, 2>();
}
}
}

template <typename B, size_t I>
VectorReader<B>* ensureReaderImpl() const {
auto* reader = static_cast<VectorReader<B>*>(castReaders_[I].get());
if (LIKELY(reader != nullptr)) {
return reader;
} else {
castReaders_[I] = std::make_shared<VectorReader<B>>(&decoded_);
return static_cast<VectorReader<B>*>(castReaders_[I].get());
}
}

const DecodedVector& decoded_;
std::array<std::shared_ptr<void>, 3>& castReaders_;
TypePtr& castType_;
vector_size_t index_;
};

Expand Down
15 changes: 8 additions & 7 deletions velox/expression/VectorUdfTypeSystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <velox/vector/BaseVector.h>
#include <velox/vector/TypeAliases.h>
#include <algorithm>
#include <array>
#include <cstring>
#include <string_view>
#include <type_traits>
Expand Down Expand Up @@ -450,9 +451,6 @@ struct VectorReader<Array<V>> {
childReader_{detail::decode(arrayValuesDecoder_, *vector_.elements())} {
}

explicit VectorReader(const VectorReader<Array<V>>&) = delete;
VectorReader<Array<V>>& operator=(const VectorReader<Array<V>>&) = delete;

bool isSet(size_t offset) const {
return !decoded_.isNullAt(offset);
}
Expand Down Expand Up @@ -1458,8 +1456,7 @@ struct VectorReader<Generic<T>> {
using exec_in_t = GenericView;
using exec_null_free_in_t = exec_in_t;

explicit VectorReader(const DecodedVector* decoded)
: decoded_(*decoded), base_(decoded->base()) {}
explicit VectorReader(const DecodedVector* decoded) : decoded_(*decoded) {}

explicit VectorReader(const VectorReader<Generic<T>>&) = delete;

Expand All @@ -1471,7 +1468,7 @@ struct VectorReader<Generic<T>> {

exec_in_t operator[](size_t offset) const {
auto index = decoded_.index(offset);
return GenericView{base_, index};
return GenericView{decoded_, castReaders_, castType_, index};
}

exec_null_free_in_t readNullFree(vector_size_t offset) const {
Expand Down Expand Up @@ -1509,7 +1506,11 @@ struct VectorReader<Generic<T>> {
}

const DecodedVector& decoded_;
const BaseVector* base_;

// Those two variables are mutated by the GenericView during cast operations,
// and are shared across GenericViews constructed by the reader.
mutable std::array<std::shared_ptr<void>, 3> castReaders_;
mutable TypePtr castType_ = nullptr;
};

} // namespace facebook::velox::exec
Expand Down
Loading

0 comments on commit dd36fe8

Please sign in to comment.