From f15ae0f5f5bb447f6303a6fc1c0516db765de8d7 Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Mon, 18 Apr 2022 14:36:52 -0700 Subject: [PATCH] Cast support to generic (#1417) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/1417 - This diff adds the support of type access and cast to generics. - It introduces 4 functions that can be called on the GenericView: - type() - kind() - castTo: performs an unchecked cast and returns arg_type. A safety debug time type check will happen. - tryCastTo: return std::optional>, performs unchecked cast. return std::null opt if T does not match the type of the vector. - **Cost**: - The first time we do the cast we create the readers corresponding to that type. Then for the coming rows, the cost is a couple of instructions; checking reader is created, accessing the reader in the variant and returning the element at the row index. In some non-common cases there is additional check that the type casted to is consistent across rows. - TryCastTo is more expensive, since it does a type check as well. - In general its not expensive to use it with complex types, but avoid using it with primitives by either implementing a function specialized when input is primitive (see == function as example ). Or casting to complex types already specialized with primitive types, i.e. Array Array instead of Array and then casting Any to int for every element. - **What can be casted to?** This diff enabled cast to all basic types plus Array Map, Row, Row and Row up to 5.. This allow to recursively traverse complex types. - **How to add a new casted to type?** - Any type except Generic<> it self, (Cast to self type) or Variadic<...>. - The diff adds example function HasDuplicate, which checks if an array has duplicate items. Reviewed By: kevinwilfong Differential Revision: D35616634 fbshipit-source-id: 847af1dfc3af9b49ce32386b05bef585be955a81 --- velox/expression/ComplexViewTypes.h | 214 ++++++++- velox/expression/VectorUdfTypeSystem.h | 15 +- velox/expression/tests/GenericViewTest.cpp | 517 ++++++++++++++++++++- velox/type/Type.h | 6 + 4 files changed, 729 insertions(+), 23 deletions(-) diff --git a/velox/expression/ComplexViewTypes.h b/velox/expression/ComplexViewTypes.h index fb8b6b0475495..483dccab24814 100644 --- a/velox/expression/ComplexViewTypes.h +++ b/velox/expression/ComplexViewTypes.h @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -921,27 +922,228 @@ inline auto get(const RowView& row) { return row.template at(); } +template +using reader_ptr_t = VectorReader*; + +template +struct HasGeneric { + static constexpr bool value() { + return false; + } +}; + +template +struct HasGeneric> { + static constexpr bool value() { + return true; + } +}; + +template +struct HasGeneric> { + static constexpr bool value() { + return HasGeneric::value() || HasGeneric::value(); + } +}; + +template +struct HasGeneric> { + static constexpr bool value() { + return HasGeneric::value(); + } +}; + +template +struct HasGeneric> { + static constexpr bool value() { + return (HasGeneric::value() || ...); + } +}; + +// This is basically Array, Map, Row. +template +struct AllGenericExceptTop { + static constexpr bool value() { + return false; + } +}; + +template +struct AllGenericExceptTop> { + static constexpr bool value() { + return isGenericType::value; + } +}; + +template +struct AllGenericExceptTop> { + static constexpr bool value() { + return isGenericType::value && isGenericType::value; + } +}; + +template +struct AllGenericExceptTop> { + static constexpr bool value() { + return (isGenericType::value && ...); + } +}; + class GenericView { public: - GenericView(const BaseVector* vector, vector_size_t index) - : vector_(vector), index_(index) {} + GenericView( + const DecodedVector& decoded, + std::array, 3>& castReaders, + TypePtr& castType, + vector_size_t index) + : decoded_(decoded), + castReaders_(castReaders), + castType_(castType), + index_(index) {} uint64_t hash() const { - return vector_->hashValueAt(index_); + return decoded_.base()->hashValueAt(index_); } bool operator==(const GenericView& other) const { - return vector_->equalValueAt(other.vector_, index_, other.index_); + return decoded_.base()->equalValueAt( + other.decoded_.base(), index_, other.index_); } std::optional compare( const GenericView& other, const CompareFlags flags) const { - return vector_->compare(other.vector_, index_, other.index_, flags); + return decoded_.base()->compare( + other.decoded_.base(), index_, other.index_, flags); + } + + TypeKind kind() const { + return decoded_.base()->typeKind(); + } + + const TypePtr type() const { + return decoded_.base()->type(); + } + + // If conversion is invalid, behavior is undefined. However, debug time + // checks will throw an exception. + template + typename VectorReader::exec_in_t castTo() const { + VELOX_DCHECK( + CastTypeChecker::check(type()), + fmt::format( + "castTo type is not compatible with type of vector, vector type is {}, casted to type is {}", + type()->toString(), + CppToType::create()->toString())); + + // TODO: We can distinguish if this is a null-free or not null-free + // generic. And based on that determine if we want to call operator[] or + // readNullFree. For now we always return nullable. + return ensureReader()->operator[](index_); + } + + template + std::optional::exec_in_t> tryCastTo() const { + if (!CastTypeChecker::check(type())) { + return std::nullopt; + } + + return ensureReader()->operator[](index_); } private: - const BaseVector* vector_; + // Utility class that checks that vectorType matches T. + template + struct CastTypeChecker { + static bool check(const TypePtr& vectorType) { + return CppToType::typeKind == vectorType->kind(); + } + }; + + template + struct CastTypeChecker> { + static bool check(const TypePtr&) { + return true; + } + }; + + template + struct CastTypeChecker> { + static bool check(const TypePtr& vectorType) { + return TypeKind::ARRAY == vectorType->kind() && + CastTypeChecker::check(vectorType->childAt(0)); + } + }; + + template + struct CastTypeChecker> { + static bool check(const TypePtr& vectorType) { + return TypeKind::MAP == vectorType->kind() && + CastTypeChecker::check(vectorType->childAt(0)) && + CastTypeChecker::check(vectorType->childAt(1)); + } + }; + + template + struct CastTypeChecker> { + static bool check(const TypePtr& vectorType) { + int index = 0; + return TypeKind::ROW == vectorType->kind() && + (CastTypeChecker::check(vectorType->childAt(index++)) && ... && + true); + } + }; + + template + VectorReader* ensureReader() const { + static_assert( + !isGenericType::value && !isVariadicType::value, + "That does not make any sense! You cant cast to Generic or Variadic"); + + // This is an optimization to avoid checking dynamically for every row that + // the user is always casting to the same type. + // Types are divided into three sets, for 1, and 2 we do not do the check, + // since no two types can ever refer to the same vector. + + if constexpr (!HasGeneric::value()) { + // Two types with no generic can never represent same vector. + return ensureReaderImpl(); + } else { + if constexpr (AllGenericExceptTop::value()) { + // This is basically Array, Map, Row. + return ensureReaderImpl(); + } else { + auto requestedType = CppToType::create(); + if (castType_) { + VELOX_USER_CHECK( + castType_->operator==(*requestedType), + fmt::format( + "Not allowed to cast to the two types {} and {} within the same batch." + "Consider creating a new type set to allow it.", + castType_->toString(), + requestedType->toString())); + } else { + castType_ = std::move(requestedType); + } + return ensureReaderImpl(); + } + } + } + + template + VectorReader* ensureReaderImpl() const { + auto* reader = static_cast*>(castReaders_[I].get()); + if (LIKELY(reader != nullptr)) { + return reader; + } else { + castReaders_[I] = std::make_shared>(&decoded_); + return static_cast*>(castReaders_[I].get()); + } + } + + const DecodedVector& decoded_; + std::array, 3>& castReaders_; + TypePtr& castType_; vector_size_t index_; }; diff --git a/velox/expression/VectorUdfTypeSystem.h b/velox/expression/VectorUdfTypeSystem.h index 20f0cd6a4888d..8bace04aca2b9 100644 --- a/velox/expression/VectorUdfTypeSystem.h +++ b/velox/expression/VectorUdfTypeSystem.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -450,9 +451,6 @@ struct VectorReader> { childReader_{detail::decode(arrayValuesDecoder_, *vector_.elements())} { } - explicit VectorReader(const VectorReader>&) = delete; - VectorReader>& operator=(const VectorReader>&) = delete; - bool isSet(size_t offset) const { return !decoded_.isNullAt(offset); } @@ -1458,8 +1456,7 @@ struct VectorReader> { using exec_in_t = GenericView; using exec_null_free_in_t = exec_in_t; - explicit VectorReader(const DecodedVector* decoded) - : decoded_(*decoded), base_(decoded->base()) {} + explicit VectorReader(const DecodedVector* decoded) : decoded_(*decoded) {} explicit VectorReader(const VectorReader>&) = delete; @@ -1471,7 +1468,7 @@ struct VectorReader> { exec_in_t operator[](size_t offset) const { auto index = decoded_.index(offset); - return GenericView{base_, index}; + return GenericView{decoded_, castReaders_, castType_, index}; } exec_null_free_in_t readNullFree(vector_size_t offset) const { @@ -1509,7 +1506,11 @@ struct VectorReader> { } const DecodedVector& decoded_; - const BaseVector* base_; + + // Those two variables are mutated by the GenericView during cast operations, + // and are shared across GenericViews constructed by the reader. + mutable std::array, 3> castReaders_; + mutable TypePtr castType_ = nullptr; }; } // namespace facebook::velox::exec diff --git a/velox/expression/tests/GenericViewTest.cpp b/velox/expression/tests/GenericViewTest.cpp index 4da33f7340f28..6feff009d0b31 100644 --- a/velox/expression/tests/GenericViewTest.cpp +++ b/velox/expression/tests/GenericViewTest.cpp @@ -106,9 +106,20 @@ class GenericViewTest : public functions::test::FunctionBaseTest { } } } + + template + void testHasGeneric(bool expected) { + ASSERT_EQ(exec::HasGeneric::value(), expected) + << CppToType::create()->toString(); + } + + template + void testAllGenericExceptTop(bool expected) { + ASSERT_EQ(exec::AllGenericExceptTop::value(), expected); + } }; -TEST_F(GenericViewTest, testInt) { +TEST_F(GenericViewTest, primitive) { std::vector> data1 = { 1, 2, std::nullopt, 1, std::nullopt, 5, 6}; std::vector> data2 = { @@ -120,7 +131,7 @@ TEST_F(GenericViewTest, testInt) { testHash(vector1); } -TEST_F(GenericViewTest, testCompare) { +TEST_F(GenericViewTest, compare) { std::vector> data = {1, 2, std::nullopt, 1}; auto vector = vectorMaker_.flatVectorNullable(data); @@ -139,7 +150,7 @@ TEST_F(GenericViewTest, testCompare) { } // Test reader where generic elements are arrays -TEST_F(GenericViewTest, testArrayOfInt) { +TEST_F(GenericViewTest, arrayOfInt) { auto vector1 = vectorMaker_.arrayVectorNullable(arrayData1); auto vector2 = vectorMaker_.arrayVectorNullable(arrayData2); testEqual(vector1, vector2, arrayData1, arrayData2); @@ -147,7 +158,7 @@ TEST_F(GenericViewTest, testArrayOfInt) { } // Test reader> where generic elements are ints. -TEST_F(GenericViewTest, testArrayOfGeneric) { +TEST_F(GenericViewTest, arrayOfGeneric) { auto vector1 = vectorMaker_.arrayVectorNullable(arrayData1); auto vector2 = vectorMaker_.arrayVectorNullable(arrayData2); @@ -181,9 +192,8 @@ struct CompareFunc { VELOX_DEFINE_FUNCTION_TYPES(T); template - FOLLY_ALWAYS_INLINE bool call(bool& out, const G& a, const G& b) { + void call(bool& out, const G& a, const G& b) { out = (a == b); - return true; } }; @@ -215,9 +225,8 @@ struct HashFunc { VELOX_DEFINE_FUNCTION_TYPES(T); template - FOLLY_ALWAYS_INLINE bool call(int64_t& out, const G& a, const G& b) { + void call(int64_t& out, const G& a, const G& b) { out = a.hash() + b.hash(); - return true; } }; @@ -274,12 +283,11 @@ struct HashAllArgs { VELOX_DEFINE_FUNCTION_TYPES(T); template - FOLLY_ALWAYS_INLINE bool call(int64_t& out, const G& args) { + void call(int64_t& out, const G& args) { out = 0; for (auto arg : args) { out += arg.value().hash(); } - return true; } }; @@ -335,5 +343,494 @@ TEST_F(GenericViewTest, e2eHashVariadicAnyType) { } } +TEST_F(GenericViewTest, castToInt) { + std::vector> data = {1, 2, std::nullopt, 1}; + + auto vector = vectorMaker_.flatVectorNullable(data); + DecodedVector decoded; + exec::VectorReader> reader(decode(decoded, *vector)); + ASSERT_EQ(reader[0].castTo(), 1); + ASSERT_EQ(reader[0].tryCastTo().value(), 1); + + ASSERT_EQ(reader[1].castTo(), 2); + ASSERT_EQ(reader[1].tryCastTo().value(), 2); +} + +TEST_F(GenericViewTest, castToArrayViewOfGeneric) { + VectorPtr vector = vectorMaker_.arrayVectorNullable(arrayData1); + + DecodedVector decoded; + exec::VectorReader> reader(decode(decoded, *vector)); + + auto generic = reader[4]; // {{0, 1, 2, 4}} + ASSERT_EQ(generic.kind(), TypeKind::ARRAY); + + // Test cast to. + { + auto arrayView = generic.castTo>>(); + auto i = 0; + for (auto genericItem : arrayView) { + if (genericItem.has_value()) { + ASSERT_EQ(genericItem.value().kind(), TypeKind::BIGINT); + auto val = genericItem.value().castTo(); + ASSERT_EQ(val, arrayData1[4].value()[i].value()); + i++; + } + } + } + + // Test try cast to. + { + auto arrayView = generic.tryCastTo>>().value(); + + auto i = 0; + for (auto genericItem : arrayView) { + if (genericItem.has_value()) { + ASSERT_EQ(genericItem.value().kind(), TypeKind::BIGINT); + + auto val = genericItem.value().tryCastTo().value(); + ASSERT_EQ(val, arrayData1[4].value()[i].value()); + i++; + } + } + } +} + +TEST_F(GenericViewTest, tryCastTo) { + DecodedVector decoded; + + { // Reader for vector of bigint. + auto vector = vectorMaker_.flatVectorNullable({1}); + exec::VectorReader> reader(decode(decoded, *vector)); + + ASSERT_FALSE(reader[0].tryCastTo().has_value()); + ASSERT_FALSE(reader[0].tryCastTo().has_value()); + ASSERT_FALSE(reader[0].tryCastTo().has_value()); + ASSERT_FALSE(reader[0].tryCastTo>>().has_value()); + ASSERT_FALSE( + (reader[0].tryCastTo, Generic<>>>().has_value())); + ASSERT_FALSE(reader[0].tryCastTo>>().has_value()); + + ASSERT_EQ(reader[0].tryCastTo().value(), 1); + } + + { // Reader for vector of array(bigint). + auto arrayVector = vectorMaker_.arrayVectorNullable(arrayData1); + exec::VectorReader> reader(decode(decoded, *arrayVector)); + + ASSERT_FALSE(reader[0].tryCastTo().has_value()); + ASSERT_FALSE(reader[0].tryCastTo().has_value()); + ASSERT_FALSE(reader[0].tryCastTo().has_value()); + ASSERT_FALSE( + (reader[0].tryCastTo, Generic<>>>().has_value())); + + ASSERT_TRUE(reader[0].tryCastTo>>().has_value()); + } +} + +TEST_F(GenericViewTest, validMultiCast) { + DecodedVector decoded; + + { // Reader for vector of array(bigint). + auto arrayVector = vectorMaker_.arrayVectorNullable(arrayData1); + exec::VectorReader> reader(decode(decoded, *arrayVector)); + + ASSERT_EQ( + reader[0].tryCastTo>>().value().size(), + arrayData1[0].value().size()); + ASSERT_EQ( + reader[0].tryCastTo>().value().size(), + arrayData1[0].value().size()); + } +} + +TEST_F(GenericViewTest, invalidMultiCast) { + DecodedVector decoded; + + { // Reader for vector of map(bigint, bigint). + auto arrayVector = makeMapVector({{{1, 2}, {3, 4}}}); + exec::VectorReader> reader(decode(decoded, *arrayVector)); + + // valid. + ASSERT_EQ((reader[0].castTo, Generic<>>>().size()), 2); + // valid. + ASSERT_EQ((reader[0].castTo>().size()), 2); + // valid. + ASSERT_EQ((reader[0].castTo, int64_t>>().size()), 2); + + // Will throw since Map, int64_t> and Map> not + // allowed togother. + EXPECT_THROW( + (reader[0].castTo>>().size()), VeloxUserError); + } +} + +TEST_F(GenericViewTest, castToMap) { + using map_type = std::vector>>; + + map_type map1 = {}; + map_type map2 = {{1, 4}, {3, 3}, {4, std::nullopt}}; + + std::vector mapsData = {map1, map2}; + + auto mapVector = makeMapVector(mapsData); + + DecodedVector decoded; + exec::VectorReader> reader(decode(decoded, *mapVector)); + + { + auto generic = reader[0]; + auto map = generic.tryCastTo, Generic<>>>(); + ASSERT_TRUE(map.has_value()); + auto mapView = map.value(); + ASSERT_EQ(mapView.size(), 0); + } + + { + auto generic = reader[1]; + auto mapView = generic.castTo, Generic<>>>(); + ASSERT_EQ(mapView.size(), 3); + ASSERT_EQ(mapView.begin()->first.castTo(), 1); + ASSERT_EQ(mapView.begin()->second.value().castTo(), 4); + } +} + +// A function that convert a variaidic number of inputs that can have +// any type to string written using castTo. +template +struct ToStringFuncCastTo { + VELOX_DEFINE_FUNCTION_TYPES(T); + + // Used to print nullable items of maps, rows and arrays. + template + void printItem(out_type& out, const TItem& item) { + if (item.has_value()) { + print(out, *item); + } else { + out += "null"; + } + } + + template + void printMap(out_type& out, const TMapView& mapView) { + out += "map("; + for (auto [key, value] : mapView) { + out += "<"; + print(out, key); + out += ","; + printItem(out, value); + out += ">,"; + } + out += ")"; + } + + template + void printArray(out_type& out, const TArrayView& arrayView) { + out += "array("; + for (const auto& item : arrayView) { + printItem(out, item); + out += ", "; + } + out += ")"; + } + + template + void printRow(out_type& out, const TRowView& rowView) { + out += "row("; + printItem(out, exec::get<0>(rowView)); + out += ", "; + printItem(out, exec::get<1>(rowView)); + out += ")"; + } + + void print(out_type& out, const arg_type>& arg) { + // Note: VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH can be used to simplify + // iterating over all primitive types. + switch (arg.kind()) { + case TypeKind::BIGINT: + out += std::to_string(arg.template castTo()); + break; + case TypeKind::DOUBLE: + out += std::to_string(arg.template castTo()); + break; + case TypeKind::BOOLEAN: + out += std::to_string(arg.template castTo()); + break; + case TypeKind::VARCHAR: + out += arg.template castTo(); + break; + case TypeKind::ARRAY: { + if (*arg.type() == *ARRAY(BIGINT())) { + // Special handling for array usually this is faster than + // going through Array> and casting every element. + auto arrayView = arg.template castTo>(); + out += "array("; + for (auto item : arrayView) { + if (item.has_value()) { + out += std::to_string(item.value()); + } else { + out += "null"; + } + out += ", "; + } + out += ")"; + } else { + auto arrayView = arg.template castTo>>(); + printArray(out, arrayView); + } + break; + } + case TypeKind::MAP: { + auto mapView = arg.template castTo, Generic<>>>(); + printMap(out, mapView); + break; + } + case TypeKind::ROW: { + auto rowSize = arg.type()->asRow().size(); + VELOX_CHECK(rowSize == 2, "print only supports rows of width 2"); + auto rowView = arg.template castTo, Generic<>>>(); + printRow(out, rowView); + break; + } + default: + VELOX_UNREACHABLE("not supported"); + } + } + + void call(out_type& out, const arg_type>>& args) { + auto i = 0; + for (const auto& arg : args) { + out += "arg " + std::to_string(i++) + " : "; + printItem(out, arg); + out += "\n"; + } + } +}; + +TEST_F(GenericViewTest, castE2E) { + registerFunction>>( + {"to_string_cast"}); + + auto test = [&](const std::string& args, const std::string& expected) { + auto result = evaluate>( + fmt::format("to_string_cast({})", args), + makeRowVector({makeFlatVector(1)})); + ASSERT_EQ(result->valueAt(0).str(), expected); + }; + test("row_constructor(1,2)", "arg 0 : row(1, 2)\n"); + + test( + "row_constructor(array_constructor(1,2,3),true)", + "arg 0 : row(array(1, 2, 3, ), 1)\n"); + + test( + "'hi', array_constructor(array_constructor(1.2, 1.4)), 1", + "arg 0 : hi\narg 1 : array(array(1.200000, 1.400000, ), )\narg 2 : 1\n"); + + test( + "1.3, map(array_constructor(1), array_constructor(null))", + "arg 0 : 1.300000\narg 1 : map(<1,null>,)\n"); +} + +// A function that convert a variaidic number of inputs that can have +// any type to string written using tryCastTo. +template +struct ToStringFuncTryCastTo { + VELOX_DEFINE_FUNCTION_TYPES(T); + + // Uses to print nullable items of maps, rows and arrays. + template + void printItem(out_type& out, const TItem& item) { + if (item.has_value()) { + print(out, *item); + } else { + out += "null"; + } + } + + template + void printMap(out_type& out, const TMapView& mapView) { + out += "map("; + for (auto [key, value] : mapView) { + out += "<"; + print(out, key); + out += ","; + printItem(out, value); + out += ">,"; + } + out += ")"; + } + + template + void printArray(out_type& out, const TArrayView& arrayView) { + out += "array("; + for (const auto& item : arrayView) { + printItem(out, item); + out += ", "; + } + out += ")"; + } + + template + void printRow(out_type& out, const TRowView& rowView) { + out += "row("; + printItem(out, exec::get<0>(rowView)); + out += ", "; + printItem(out, exec::get<1>(rowView)); + out += ")"; + } + + void print(out_type& out, const arg_type>& arg) { + if (auto bigIntValue = arg.template tryCastTo()) { + out += std::to_string(*bigIntValue); + } else if (auto doubleValue = arg.template tryCastTo()) { + out += std::to_string(*doubleValue); + } else if (auto boolValue = arg.template tryCastTo()) { + out += std::to_string(*boolValue); + } else if (auto arrayView = arg.template tryCastTo>()) { + // Special handling for array usually this is faster than going + // through Array> and casting every element. + out += "array("; + for (auto item : *arrayView) { + if (item.has_value()) { + out += std::to_string(item.value()); + } else { + out += "null"; + } + out += ", "; + } + out += ")"; + } else if (auto arrayView = arg.template tryCastTo>>()) { + printArray(out, *arrayView); + } else if ( + auto mapView = arg.template tryCastTo, Generic<>>>()) { + printMap(out, *mapView); + } else if (auto stringView = arg.template tryCastTo()) { + out += *stringView; + } else if ( + auto rowView = arg.template tryCastTo, Generic<>>>()) { + printRow(out, *rowView); + } else { + VELOX_UNREACHABLE("type not supported in this function"); + } + } + + void call(out_type& out, const arg_type>>& args) { + auto i = 0; + for (const auto& arg : args) { + out += "arg " + std::to_string(i++) + " : "; + printItem(out, arg); + out += "\n"; + } + } +}; + +TEST_F(GenericViewTest, tryCastE2E) { + registerFunction>>( + {"to_string_try_cast"}); + + auto test = [&](const std::string& args, const std::string& expected) { + auto result = evaluate>( + fmt::format("to_string_try_cast({})", args), + makeRowVector({makeFlatVector(1)})); + ASSERT_EQ(result->valueAt(0).str(), expected); + }; + test("row_constructor(1,2)", "arg 0 : row(1, 2)\n"); + + test( + "row_constructor(array_constructor(1,2,3),true)", + "arg 0 : row(array(1, 2, 3, ), 1)\n"); + + test( + "'hi', array_constructor(array_constructor(1.2, 1.4)), 1", + "arg 0 : hi\narg 1 : array(array(1.200000, 1.400000, ), )\narg 2 : 1\n"); + + test( + "1.3, map(array_constructor(1), array_constructor(null))", + "arg 0 : 1.300000\narg 1 : map(<1,null>,)\n"); +} + +template +struct ArrayHasDuplicateFunc { + VELOX_DEFINE_FUNCTION_TYPES(T); + bool call(bool& out, const arg_type>>& input) { + std::unordered_set>> set; + for (auto item : input) { + if (!item.has_value()) { + // Return null if null is encountered. + return false; + } + + if (set.count(*item)) { + // Item already exisits. + out = true; + return true; + } + set.insert(*item); + } + out = false; + return true; + } +}; + +TEST_F(GenericViewTest, hasDuplicate) { + registerFunction>>( + {"has_duplicate_func"}); + + auto test = [&](const std::string& arg, bool expected) { + auto result = evaluate>( + fmt::format("has_duplicate_func(array_constructor({}))", arg), + makeRowVector({makeFlatVector(1)})); + ASSERT_EQ(result->valueAt(0), expected); + }; + + test("1,2,3,4,5", false); + test("1,2,3,4,4", true); + + test("'what','no'", false); + test("'what','what'", true); + + // Nested array. + test( + "array_constructor(1,2,3),array_constructor(1,2), array_constructor(1)", + false); + test( + "array_constructor(1,2,3),array_constructor(1), array_constructor(1)", + true); +} + +TEST_F(GenericViewTest, hasGenericTest) { + testHasGeneric(false); + testHasGeneric>(false); + testHasGeneric>(false); + testHasGeneric>(false); + testHasGeneric>(false); + + testHasGeneric>(true); + testHasGeneric>>(true); + testHasGeneric>>(true); + testHasGeneric, int64_t>>(true); + testHasGeneric, int64_t>>(true); + testHasGeneric>>(true); +} + +TEST_F(GenericViewTest, allGenericExceptTop) { + testAllGenericExceptTop(false); + testAllGenericExceptTop>(false); + testAllGenericExceptTop>(false); + testAllGenericExceptTop>(false); + testAllGenericExceptTop>(false); + testAllGenericExceptTop>>(false); + testAllGenericExceptTop, int64_t>>(false); + testAllGenericExceptTop, int64_t>>(false); + testAllGenericExceptTop>>(false); + + testAllGenericExceptTop>>(true); + testAllGenericExceptTop, Generic<>>>(true); + testAllGenericExceptTop, Generic<>>>(true); + testAllGenericExceptTop>(true); + testAllGenericExceptTop>>(true); +} + } // namespace } // namespace facebook::velox diff --git a/velox/type/Type.h b/velox/type/Type.h index 8c105f4a63cf6..6fa79270f3f4b 100644 --- a/velox/type/Type.h +++ b/velox/type/Type.h @@ -1307,6 +1307,12 @@ struct isVariadicType : public std::false_type {}; template struct isVariadicType> : public std::true_type {}; +template +struct isGenericType : public std::false_type {}; + +template +struct isGenericType> : public std::true_type {}; + template struct Map { using key_type = KEY;