From e8aea05ecf5ee93b4f62c9365b1a93eb117f0fe9 Mon Sep 17 00:00:00 2001 From: Pedro Eugenio Rocha Pedreira Date: Fri, 3 May 2024 17:53:59 -0700 Subject: [PATCH] Improve evaluateOnce() helper function Summary: Improving the evaluateOnce() helper function to allow for more flexibility when specifying input logical types. Also deprecating an old overload of the function that only allowed multiple parameters with they had the same type. . For a lot more details, check the header documentation added. Differential Revision: D56962773 --- .../prestosql/tests/BinaryFunctionsTest.cpp | 62 +++---- .../prestosql/tests/DateTimeFunctionsTest.cpp | 72 ++++---- .../prestosql/tests/FromUtf8Test.cpp | 3 +- .../tests/HyperLogLogFunctionsTest.cpp | 10 +- .../prestosql/tests/utils/FunctionBaseTest.h | 155 ++++++++++++++---- .../sparksql/tests/ArithmeticTest.cpp | 10 +- .../sparksql/tests/DateTimeFunctionsTest.cpp | 72 ++++---- .../sparksql/tests/LeastGreatestTest.cpp | 26 +-- .../sparksql/tests/MakeTimestampTest.cpp | 25 ++- velox/functions/sparksql/tests/StringTest.cpp | 12 +- 10 files changed, 262 insertions(+), 185 deletions(-) diff --git a/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp b/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp index e690674ba431b..eb939f6a9a8d6 100644 --- a/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp @@ -44,8 +44,7 @@ class BinaryFunctionsTest : public FunctionBaseTest {}; TEST_F(BinaryFunctionsTest, md5) { const auto md5 = [&](std::optional arg) { - return evaluateOnce( - "md5(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("md5(c0)", VARBINARY(), arg); }; EXPECT_EQ(hexToDec("533f6357e0210e67d91f651bc49e1278"), md5("hashme")); @@ -57,8 +56,7 @@ TEST_F(BinaryFunctionsTest, md5) { TEST_F(BinaryFunctionsTest, sha1) { const auto sha1 = [&](std::optional arg) { - return evaluateOnce( - "sha1(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("sha1(c0)", VARBINARY(), arg); }; // The result values were obtained from Presto Java sha1 function. @@ -87,8 +85,7 @@ TEST_F(BinaryFunctionsTest, sha1) { TEST_F(BinaryFunctionsTest, sha256) { const auto sha256 = [&](std::optional arg) { - return evaluateOnce( - "sha256(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("sha256(c0)", VARBINARY(), arg); }; EXPECT_EQ( @@ -109,8 +106,7 @@ TEST_F(BinaryFunctionsTest, sha256) { TEST_F(BinaryFunctionsTest, sha512) { const auto sha512 = [&](std::optional arg) { - return evaluateOnce( - "sha512(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("sha512(c0)", VARBINARY(), arg); }; EXPECT_EQ( @@ -134,8 +130,7 @@ TEST_F(BinaryFunctionsTest, sha512) { TEST_F(BinaryFunctionsTest, spookyHashV232) { const auto spookyHashV232 = [&](std::optional arg) { - return evaluateOnce( - "spooky_hash_v2_32(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("spooky_hash_v2_32(c0)", VARBINARY(), arg); }; // The result values were obtained from Presto Java spooky_hash_v2_32 @@ -154,8 +149,7 @@ TEST_F(BinaryFunctionsTest, spookyHashV232) { TEST_F(BinaryFunctionsTest, spookyHashV264) { const auto spookyHashV264 = [&](std::optional arg) { - return evaluateOnce( - "spooky_hash_v2_64(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("spooky_hash_v2_64(c0)", VARBINARY(), arg); }; // The result values were obtained from Presto Java spooky_hash_v2_64 @@ -177,8 +171,8 @@ TEST_F(BinaryFunctionsTest, spookyHashV264) { TEST_F(BinaryFunctionsTest, HmacSha1) { const auto hmacSha1 = [&](std::optional arg, std::optional key) { - return evaluateOnce( - "hmac_sha1(c0, c1)", {arg, key}, {VARBINARY(), VARBINARY()}); + return evaluateOnce( + "hmac_sha1(c0, c1)", {VARBINARY(), VARBINARY()}, arg, key); }; // Use python hmac lib results as the expected value. // >>> import hmac @@ -216,8 +210,8 @@ TEST_F(BinaryFunctionsTest, HmacSha1) { TEST_F(BinaryFunctionsTest, HmacSha256) { const auto hmacSha256 = [&](std::optional arg, std::optional key) { - return evaluateOnce( - "hmac_sha256(c0, c1)", {arg, key}, {VARBINARY(), VARBINARY()}); + return evaluateOnce( + "hmac_sha256(c0, c1)", {VARBINARY(), VARBINARY()}, arg, key); }; // Use python hmac lib results as the expected value. // >>> import hmac @@ -247,8 +241,8 @@ TEST_F(BinaryFunctionsTest, HmacSha256) { TEST_F(BinaryFunctionsTest, HmacSha512) { const auto hmacSha512 = [&](std::optional arg, std::optional key) { - return evaluateOnce( - "hmac_sha512(c0, c1)", {arg, key}, {VARBINARY(), VARBINARY()}); + return evaluateOnce( + "hmac_sha512(c0, c1)", {VARBINARY(), VARBINARY()}, arg, key); }; // Use the same expected value from TestVarbinaryFunctions of presto java EXPECT_EQ( @@ -265,8 +259,8 @@ TEST_F(BinaryFunctionsTest, HmacSha512) { TEST_F(BinaryFunctionsTest, HmacMd5) { const auto hmacMd5 = [&](std::optional arg, std::optional key) { - return evaluateOnce( - "hmac_md5(c0, c1)", {arg, key}, {VARBINARY(), VARBINARY()}); + return evaluateOnce( + "hmac_md5(c0, c1)", {VARBINARY(), VARBINARY()}, arg, key); }; // The result values were obtained from Presto Java hmac_md5 function. EXPECT_EQ( @@ -280,8 +274,7 @@ TEST_F(BinaryFunctionsTest, HmacMd5) { TEST_F(BinaryFunctionsTest, crc32) { const auto crc32 = [&](std::optional value) { - return evaluateOnce( - "crc32(c0)", {value}, {VARBINARY()}); + return evaluateOnce("crc32(c0)", VARBINARY(), value); }; // use python3 zlib result as the expected values, // >>> import zlib @@ -300,8 +293,7 @@ TEST_F(BinaryFunctionsTest, crc32) { TEST_F(BinaryFunctionsTest, xxhash64) { const auto xxhash64 = [&](std::optional value) { - return evaluateOnce( - "xxhash64(c0)", {value}, {VARBINARY()}); + return evaluateOnce("xxhash64(c0)", VARBINARY(), value); }; const auto toVarbinary = [](const int64_t input) { @@ -464,8 +456,7 @@ TEST_F(BinaryFunctionsTest, fromBase64Url) { TEST_F(BinaryFunctionsTest, fromBigEndian32) { const auto fromBigEndian32 = [&](const std::optional& arg) { - return evaluateOnce( - "from_big_endian_32(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("from_big_endian_32(c0)", VARBINARY(), arg); }; EXPECT_EQ(std::nullopt, fromBigEndian32(std::nullopt)); @@ -492,8 +483,7 @@ TEST_F(BinaryFunctionsTest, fromBigEndian32) { TEST_F(BinaryFunctionsTest, toBigEndian32) { const auto toBigEndian32 = [&](const std::optional& arg) { - return evaluateOnce( - "to_big_endian_32(c0)", {arg}, {INTEGER()}); + return evaluateOnce("to_big_endian_32(c0)", arg); }; EXPECT_EQ(std::nullopt, toBigEndian32(std::nullopt)); @@ -513,8 +503,7 @@ TEST_F(BinaryFunctionsTest, toBigEndian32) { TEST_F(BinaryFunctionsTest, fromBigEndian64) { const auto fromBigEndian64 = [&](const std::optional& arg) { - return evaluateOnce( - "from_big_endian_64(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("from_big_endian_64(c0)", VARBINARY(), arg); }; EXPECT_EQ(std::nullopt, fromBigEndian64(std::nullopt)); @@ -554,8 +543,7 @@ TEST_F(BinaryFunctionsTest, fromBigEndian64) { TEST_F(BinaryFunctionsTest, toBigEndian64) { const auto toBigEndian64 = [&](const std::optional& arg) { - return evaluateOnce( - "to_big_endian_64(c0)", {arg}, {BIGINT()}); + return evaluateOnce("to_big_endian_64(c0)", arg); }; EXPECT_EQ(std::nullopt, toBigEndian64(std::nullopt)); @@ -633,12 +621,11 @@ TEST_F(BinaryFunctionsTest, toIEEE754Bits64) { TEST_F(BinaryFunctionsTest, fromIEEE754Bits64) { const auto fromIEEE754Bits64 = [&](const std::optional& arg) { - return evaluateOnce( - "from_ieee754_64(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("from_ieee754_64(c0)", VARBINARY(), arg); }; const auto toIEEE754Bits64 = [&](std::optional arg) { - return evaluateOnce("to_ieee754_64(c0)", arg); + return evaluateOnce("to_ieee754_64(c0)", arg); }; EXPECT_EQ(std::nullopt, fromIEEE754Bits64(std::nullopt)); @@ -701,12 +688,11 @@ TEST_F(BinaryFunctionsTest, toIEEE754Bits32) { TEST_F(BinaryFunctionsTest, fromIEEE754Bits32) { const auto fromIEEE754Bits32 = [&](const std::optional& arg) { - return evaluateOnce( - "from_ieee754_32(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("from_ieee754_32(c0)", VARBINARY(), arg); }; const auto toIEEE754Bits32 = [&](std::optional arg) { - return evaluateOnce("to_ieee754_32(c0)", arg); + return evaluateOnce("to_ieee754_32(c0)", arg); }; EXPECT_EQ(std::nullopt, fromIEEE754Bits32(std::nullopt)); diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 425cfd4879bb9..6326dba8bb2a7 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -460,7 +460,7 @@ TEST_F(DateTimeFunctionsTest, year) { TEST_F(DateTimeFunctionsTest, yearDate) { const auto year = [&](std::optional date) { - return evaluateOnce("year(c0)", {date}, {DATE()}); + return evaluateOnce("year(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, year(std::nullopt)); EXPECT_EQ(1970, year(DATE()->toDays("1970-01-01"))); @@ -501,11 +501,9 @@ TEST_F(DateTimeFunctionsTest, yearTimestampWithTimezone) { TEST_F(DateTimeFunctionsTest, weekDate) { const auto weekDate = [&](const char* dateString) { auto date = std::make_optional(parseDate(dateString)); - auto week = - evaluateOnce("week(c0)", {date}, {DATE()}).value(); + auto week = evaluateOnce("week(c0)", DATE(), date).value(); auto weekOfYear = - evaluateOnce("week_of_year(c0)", {date}, {DATE()}) - .value(); + evaluateOnce("week_of_year(c0)", DATE(), date).value(); VELOX_CHECK_EQ( week, weekOfYear, "week and week_of_year must return the same value"); return week; @@ -596,7 +594,7 @@ TEST_F(DateTimeFunctionsTest, quarter) { TEST_F(DateTimeFunctionsTest, quarterDate) { const auto quarter = [&](std::optional date) { - return evaluateOnce("quarter(c0)", {date}, {DATE()}); + return evaluateOnce("quarter(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, quarter(std::nullopt)); EXPECT_EQ(1, quarter(0)); @@ -662,7 +660,7 @@ TEST_F(DateTimeFunctionsTest, month) { TEST_F(DateTimeFunctionsTest, monthDate) { const auto month = [&](std::optional date) { - return evaluateOnce("month(c0)", {date}, {DATE()}); + return evaluateOnce("month(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, month(std::nullopt)); EXPECT_EQ(1, month(0)); @@ -767,7 +765,7 @@ TEST_F(DateTimeFunctionsTest, hourTimestampWithTimezone) { TEST_F(DateTimeFunctionsTest, hourDate) { const auto hour = [&](std::optional date) { - return evaluateOnce("hour(c0)", {date}, {DATE()}); + return evaluateOnce("hour(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, hour(std::nullopt)); EXPECT_EQ(0, hour(0)); @@ -803,7 +801,7 @@ TEST_F(DateTimeFunctionsTest, dayOfMonth) { TEST_F(DateTimeFunctionsTest, dayOfMonthDate) { const auto day = [&](std::optional date) { - return evaluateOnce("day_of_month(c0)", {date}, {DATE()}); + return evaluateOnce("day_of_month(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, day(std::nullopt)); EXPECT_EQ(1, day(0)); @@ -815,12 +813,12 @@ TEST_F(DateTimeFunctionsTest, dayOfMonthDate) { } TEST_F(DateTimeFunctionsTest, dayOfMonthInterval) { - const auto day = [&](int64_t millis) { - auto result = evaluateOnce( - "day_of_month(c0)", {millis}, {INTERVAL_DAY_TIME()}); + const auto day = [&](std::optional millis) { + auto result = + evaluateOnce("day_of_month(c0)", INTERVAL_DAY_TIME(), millis); - auto result2 = evaluateOnce( - "day(c0)", {millis}, {INTERVAL_DAY_TIME()}); + auto result2 = + evaluateOnce("day(c0)", INTERVAL_DAY_TIME(), millis); EXPECT_EQ(result, result2); return result; @@ -1132,7 +1130,7 @@ TEST_F(DateTimeFunctionsTest, dayOfWeek) { TEST_F(DateTimeFunctionsTest, dayOfWeekDate) { const auto day = [&](std::optional date) { - return evaluateOnce("day_of_week(c0)", {date}, {DATE()}); + return evaluateOnce("day_of_week(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, day(std::nullopt)); EXPECT_EQ(4, day(0)); @@ -1199,7 +1197,7 @@ TEST_F(DateTimeFunctionsTest, dayOfYear) { TEST_F(DateTimeFunctionsTest, dayOfYearDate) { const auto day = [&](std::optional date) { - return evaluateOnce("day_of_year(c0)", {date}, {DATE()}); + return evaluateOnce("day_of_year(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, day(std::nullopt)); EXPECT_EQ(1, day(0)); @@ -1270,7 +1268,7 @@ TEST_F(DateTimeFunctionsTest, yearOfWeek) { TEST_F(DateTimeFunctionsTest, yearOfWeekDate) { const auto yow = [&](std::optional date) { - return evaluateOnce("year_of_week(c0)", {date}, {DATE()}); + return evaluateOnce("year_of_week(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, yow(std::nullopt)); EXPECT_EQ(1970, yow(0)); @@ -1339,7 +1337,7 @@ TEST_F(DateTimeFunctionsTest, minute) { TEST_F(DateTimeFunctionsTest, minuteDate) { const auto minute = [&](std::optional date) { - return evaluateOnce("minute(c0)", {date}, {DATE()}); + return evaluateOnce("minute(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, minute(std::nullopt)); EXPECT_EQ(0, minute(0)); @@ -1403,7 +1401,7 @@ TEST_F(DateTimeFunctionsTest, second) { TEST_F(DateTimeFunctionsTest, secondDate) { const auto second = [&](std::optional date) { - return evaluateOnce("second(c0)", {date}, {DATE()}); + return evaluateOnce("second(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, second(std::nullopt)); EXPECT_EQ(0, second(0)); @@ -1458,7 +1456,7 @@ TEST_F(DateTimeFunctionsTest, millisecond) { TEST_F(DateTimeFunctionsTest, millisecondDate) { const auto millisecond = [&](std::optional date) { - return evaluateOnce("millisecond(c0)", {date}, {DATE()}); + return evaluateOnce("millisecond(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, millisecond(std::nullopt)); EXPECT_EQ(0, millisecond(0)); @@ -1569,8 +1567,8 @@ TEST_F(DateTimeFunctionsTest, dateTrunc) { TEST_F(DateTimeFunctionsTest, dateTruncDate) { const auto dateTrunc = [&](const std::string& unit, std::optional date) { - return evaluateOnce( - fmt::format("date_trunc('{}', c0)", unit), {date}, {DATE()}); + return evaluateOnce( + fmt::format("date_trunc('{}', c0)", unit), DATE(), date); }; EXPECT_EQ(std::nullopt, dateTrunc("year", std::nullopt)); @@ -1606,8 +1604,8 @@ TEST_F(DateTimeFunctionsTest, dateTruncDate) { TEST_F(DateTimeFunctionsTest, dateTruncDateForWeek) { const auto dateTrunc = [&](const std::string& unit, std::optional date) { - return evaluateOnce( - fmt::format("date_trunc('{}', c0)", unit), {date}, {DATE()}); + return evaluateOnce( + fmt::format("date_trunc('{}', c0)", unit), DATE(), date); }; // Date(19576) is 2023-08-07, which is Monday, should return Monday @@ -1852,10 +1850,11 @@ TEST_F(DateTimeFunctionsTest, dateAddDate) { const auto dateAdd = [&](const std::string& unit, std::optional value, std::optional date) { - return evaluateOnce( + return evaluateOnce( fmt::format("date_add('{}', c0, c1)", unit), - {value, date}, - {INTEGER(), DATE()}); + {INTEGER(), DATE()}, + value, + date); }; // Check null behaviors @@ -2269,10 +2268,11 @@ TEST_F(DateTimeFunctionsTest, dateDiffDate) { const auto dateDiff = [&](const std::string& unit, std::optional date1, std::optional date2) { - return evaluateOnce( + return evaluateOnce( fmt::format("date_diff('{}', c0, c1)", unit), - {date1, date2}, - {DATE(), DATE()}); + {DATE(), DATE()}, + date1, + date2); }; // Check null behaviors @@ -3844,8 +3844,7 @@ TEST_F(DateTimeFunctionsTest, timestampWithTimezoneComparisons) { TEST_F(DateTimeFunctionsTest, castDateToTimestamp) { const int64_t kSecondsInDay = kMillisInDay / 1'000; const auto castDateToTimestamp = [&](const std::optional date) { - return evaluateOnce( - "cast(c0 AS timestamp)", {date}, {DATE()}); + return evaluateOnce("cast(c0 AS timestamp)", DATE(), date); }; EXPECT_EQ(Timestamp(0, 0), castDateToTimestamp(DATE()->toDays("1970-01-01"))); @@ -3908,8 +3907,7 @@ TEST_F(DateTimeFunctionsTest, castDateToTimestamp) { TEST_F(DateTimeFunctionsTest, lastDayOfMonthDate) { const auto lastDayFunc = [&](const std::optional date) { - return evaluateOnce( - "last_day_of_month(c0)", {date}, {DATE()}); + return evaluateOnce("last_day_of_month(c0)", DATE(), date); }; const auto lastDay = [&](const StringView& dateStr) { @@ -4010,8 +4008,10 @@ TEST_F(DateTimeFunctionsTest, fromUnixtimeDouble) { TEST_F(DateTimeFunctionsTest, toISO8601Date) { const auto toISO8601 = [&](const char* dateString) { - return evaluateOnce( - "to_iso8601(c0)", {DATE()->toDays(dateString)}, {DATE()}); + return evaluateOnce( + "to_iso8601(c0)", + DATE(), + std::make_optional(DATE()->toDays(dateString))); }; EXPECT_EQ("1970-01-01", toISO8601("1970-01-01")); diff --git a/velox/functions/prestosql/tests/FromUtf8Test.cpp b/velox/functions/prestosql/tests/FromUtf8Test.cpp index 79479b1b478db..b088e9c99ed60 100644 --- a/velox/functions/prestosql/tests/FromUtf8Test.cpp +++ b/velox/functions/prestosql/tests/FromUtf8Test.cpp @@ -23,8 +23,7 @@ namespace { class FromUtf8Test : public test::FunctionBaseTest { protected: std::optional fromUtf8(std::optional value) { - return evaluateOnce( - "from_utf8(c0)", {value}, {VARBINARY()}); + return evaluateOnce("from_utf8(c0)", VARBINARY(), value); } std::optional fromUtf8( diff --git a/velox/functions/prestosql/tests/HyperLogLogFunctionsTest.cpp b/velox/functions/prestosql/tests/HyperLogLogFunctionsTest.cpp index 414bcbb07a786..cf729881f5854 100644 --- a/velox/functions/prestosql/tests/HyperLogLogFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/HyperLogLogFunctionsTest.cpp @@ -71,9 +71,8 @@ TEST_F(HyperLogLogFunctionsTest, emptyApproxSetSignatures) { } TEST_F(HyperLogLogFunctionsTest, cardinalitySparse) { - const auto cardinality = [&](const std::string& input) { - return evaluateOnce( - "cardinality(c0)", {StringView(input)}, {HYPERLOGLOG()}); + const auto cardinality = [&](const std::optional& input) { + return evaluateOnce("cardinality(c0)", HYPERLOGLOG(), input); }; SparseHll sparseHll{&allocator_}; @@ -86,9 +85,8 @@ TEST_F(HyperLogLogFunctionsTest, cardinalitySparse) { } TEST_F(HyperLogLogFunctionsTest, cardinalityDense) { - const auto cardinality = [&](const std::string& input) { - return evaluateOnce( - "cardinality(c0)", {StringView(input)}, {HYPERLOGLOG()}); + const auto cardinality = [&](const std::optional& input) { + return evaluateOnce("cardinality(c0)", HYPERLOGLOG(), input); }; DenseHll denseHll{12, &allocator_}; diff --git a/velox/functions/prestosql/tests/utils/FunctionBaseTest.h b/velox/functions/prestosql/tests/utils/FunctionBaseTest.h index 8cedaec8e7d77..c8c28373addd2 100644 --- a/velox/functions/prestosql/tests/utils/FunctionBaseTest.h +++ b/velox/functions/prestosql/tests/utils/FunctionBaseTest.h @@ -16,6 +16,7 @@ #pragma once #include +#include #include "velox/expression/Expr.h" #include "velox/parse/Expressions.h" @@ -155,53 +156,109 @@ class FunctionBaseTest : public testing::Test, return std::dynamic_pointer_cast(results[0]); } - // Evaluate the given expression once, returning the result as a std::optional - // C++ value. Arguments should be referenced using c0, c1, .. cn. Supports - // integers, floats, booleans, and strings. - // - // Example: - // std::optional exp(std::optional a) { - // return evaluateOnce("exp(c0)", a); - // } - // EXPECT_EQ(1, exp(0)); - // EXPECT_EQ(std::nullopt, exp(std::nullopt)); - template - std::optional evaluateOnce( + /// Evaluate a given expression over a single row of input returning the + /// result as a std::optional C++ value. Prefer to use the `evaluateOnce()` + /// helper methods below when testing simple functions instead of manually + /// handling input and output vectors. + /// + /// Arguments should be referenced using c0, c1, .. cn. Supports integers, + /// floats, booleans, strings, timestamps, and other primitive types. + /// + /// The template parameter type `TReturn` is mandatory and used to cast the + /// returned value. Always use the C++ type for the returned value (e.g. + /// `double` not `DOUBLE()`. If the function returns a custom type, use the + /// physical type that represent the custom type (e.g. `CustomType::type`). + /// For example, use `int64_t` to return TIMESTAMP_WITH_TIME_ZONE() + /// + /// Input parameters and output should always wrapped in an std::optional to + /// allow the representation of null values. + /// + /// Example: + /// + /// std::optional exp(std::optional a) { + /// return evaluateOnce("exp(c0)", a); + /// } + /// EXPECT_EQ(1, exp(0)); + /// EXPECT_EQ(std::nullopt, exp(std::nullopt)); + /// + /// + /// Multiple parameters are supported: + /// + /// std::optional xxhash64( + /// std::optional val, + /// std::optional seed) { + /// return evaluateOnce("fb_xxhash64(c0, c1)", val, seed); + /// } + /// + /// + /// If not specified, input logical types are derived from the C++ type + /// provided as input using CppToType. In the example above, the `std::string` + /// C++ type will be translated to `VARCHAR` logical type, and `int64_t` to + /// `BIGINT`. + /// + /// To override the input logical type, use: + /// + /// std::optional hour(std::optional date) { + /// return evaluateOnce("hour(c0)", DATE(), date); + /// } + /// + /// Custom types like TIMESTAMP_WITH_TIMEZONE() and HYPERLOGLOG() are also + /// supported. For multiple parameters, use a list of logical types: + /// + /// return evaluateOnce( + /// "hmac_sha1(c0, c1)", {VARBINARY(), VARBINARY()}, arg, key); + /// + /// + /// One can also manually specify additional template parameters, in case + /// there are problems with template type deduction: + /// + /// return evaluateOnce( + /// "fb_xxhash64(c0, c1)", val, seed); + /// + template + std::optional evaluateOnce( const std::string& expr, - const std::optional&... args) { - return evaluateOnce( + const std::initializer_list& types, + std::optional... args) { + return evaluateOnce( expr, - makeRowVector(std::vector{ - makeNullableFlatVector(std::vector{args})...})); + makeRowVector(unpackEvaluateParams...>( + std::vector{types}, + std::forward>(std::move(args))...))); } - template - std::optional evaluateOnce( + template + std::optional evaluateOnce( const std::string& expr, - const std::vector>& args, - const std::vector& types, - const std::optional& rows = std::nullopt, - const TypePtr& resultType = nullptr) { - std::vector flatVectors; - for (vector_size_t i = 0; i < args.size(); ++i) { - flatVectors.emplace_back(makeNullableFlatVector( - std::vector>{args[i]}, types[i])); - } - auto rowVectorPtr = makeRowVector(flatVectors); - return evaluateOnce(expr, rowVectorPtr, rows, resultType); + std::optional... args) { + return evaluateOnce( + expr, + makeRowVector(unpackEvaluateParams...>( + {}, std::forward>(std::move(args))...))); } - template - std::optional evaluateOnce( + template + std::optional evaluateOnce( + const std::string& expr, + const TypePtr& type, + std::optional... args) { + return evaluateOnce( + expr, + makeRowVector(unpackEvaluateParams...>( + {type}, std::forward>(std::move(args))...))); + } + + template + std::optional evaluateOnce( const std::string& expr, const RowVectorPtr rowVectorPtr, const std::optional& rows = std::nullopt, const TypePtr& resultType = nullptr) { auto result = - evaluate>>( + evaluate>>( expr, rowVectorPtr, rows, resultType); - return result->isNullAt(0) ? std::optional{} - : ReturnType(result->valueAt(0)); + return result->isNullAt(0) ? std::optional{} + : TReturn(result->valueAt(0)); } core::TypedExprPtr parseExpression( @@ -302,6 +359,36 @@ class FunctionBaseTest : public testing::Test, ExprSet exprSet({typedExpr}, &execCtx_); return evaluate(exprSet, data, rows); } + + // Unpack parameters for evaluateOnce(). Base recursion case. + template + std::vector unpackEvaluateParams( + const std::vector& types) { + VELOX_CHECK(types.empty(), "Wrong number of types passed to evaluateOnce."); + return {}; + } + + // Recursively unpack input values and types into vectors. + template + std::vector unpackEvaluateParams( + const std::vector& types, + T&& value, + TArgs&&... args) { + // If there are no input types, let makeNullable figure it out. + auto output = std::vector{ + types.empty() + ? makeNullableFlatVector(std::vector{value}) + : makeNullableFlatVector(std::vector{value}, types.front()), + }; + + // Recurse starting from the second parameter. + auto result = unpackEvaluateParams( + types.size() > 1 ? std::vector{types.begin() + 1, types.end()} + : std::vector{}, + std::forward(std::move(args))...); + output.insert(output.end(), result.begin(), result.end()); + return output; + } }; } // namespace facebook::velox::functions::test diff --git a/velox/functions/sparksql/tests/ArithmeticTest.cpp b/velox/functions/sparksql/tests/ArithmeticTest.cpp index 07389a5a65f38..3f3834d691c73 100644 --- a/velox/functions/sparksql/tests/ArithmeticTest.cpp +++ b/velox/functions/sparksql/tests/ArithmeticTest.cpp @@ -291,11 +291,11 @@ class CeilFloorTest : public SparkFunctionBaseTest { protected: template std::optional ceil(std::optional a) { - return evaluateOnce("ceil(c0)", a); + return evaluateOnce("ceil(c0)", a); } template std::optional floor(std::optional a) { - return evaluateOnce("floor(c0)", a); + return evaluateOnce("floor(c0)", a); } }; @@ -356,7 +356,7 @@ TEST_F(ArithmeticTest, log1p) { class BinTest : public SparkFunctionBaseTest { protected: std::optional bin(std::optional arg) { - return evaluateOnce("bin(c0)", {arg}, {BIGINT()}); + return evaluateOnce("bin(c0)", arg); } }; @@ -370,8 +370,8 @@ TEST_F(BinTest, bin) { bin(std::numeric_limits::max()), "111111111111111111111111111111111111111111111111111111111111111"); EXPECT_EQ(bin(0), "0"); - auto result = evaluateOnce( - "bin(row_constructor(c0).c1)", {13}, {BIGINT()}); + auto result = evaluateOnce( + "bin(row_constructor(c0).c1)", std::make_optional(13L)); EXPECT_EQ(result, "1101"); } diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index cc46ba3d0e3ed..131143df3126a 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -178,7 +178,7 @@ TEST_F(DateTimeFunctionsTest, year) { TEST_F(DateTimeFunctionsTest, yearDate) { const auto year = [&](std::optional date) { - return evaluateOnce("year(c0)", {date}, {DATE()}); + return evaluateOnce("year(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, year(std::nullopt)); EXPECT_EQ(1970, year(DATE()->toDays("1970-05-05"))); @@ -190,8 +190,7 @@ TEST_F(DateTimeFunctionsTest, yearDate) { TEST_F(DateTimeFunctionsTest, weekOfYear) { const auto weekOfYear = [&](const char* dateString) { auto date = std::make_optional(parseDate(dateString)); - return evaluateOnce("week_of_year(c0)", {date}, {DATE()}) - .value(); + return evaluateOnce("week_of_year(c0)", DATE(), date).value(); }; EXPECT_EQ(1, weekOfYear("1919-12-31")); @@ -212,10 +211,11 @@ TEST_F(DateTimeFunctionsTest, weekOfYear) { TEST_F(DateTimeFunctionsTest, unixDate) { const auto unixDate = [&](std::string_view date) { - return evaluateOnce( + return evaluateOnce( "unix_date(c0)", - {util::fromDateString(date.data(), date.length())}, - {DATE()}); + DATE(), + std::make_optional( + util::fromDateString(date.data(), date.length()))); }; EXPECT_EQ(unixDate("1970-01-01"), 0); @@ -334,7 +334,7 @@ TEST_F(DateTimeFunctionsTest, makeDate) { TEST_F(DateTimeFunctionsTest, lastDay) { const auto lastDayFunc = [&](const std::optional date) { - return evaluateOnce("last_day(c0)", {date}, {DATE()}); + return evaluateOnce("last_day(c0)", DATE(), date); }; const auto lastDay = [&](const std::string& dateStr) { @@ -525,7 +525,7 @@ TEST_F(DateTimeFunctionsTest, dateSubTinyint) { TEST_F(DateTimeFunctionsTest, dayOfYear) { const auto day = [&](std::optional date) { - return evaluateOnce("dayofyear(c0)", {date}, {DATE()}); + return evaluateOnce("dayofyear(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, day(std::nullopt)); EXPECT_EQ(100, day(parseDate("2016-04-09"))); @@ -535,7 +535,7 @@ TEST_F(DateTimeFunctionsTest, dayOfYear) { TEST_F(DateTimeFunctionsTest, dayOfMonth) { const auto day = [&](std::optional date) { - return evaluateOnce("dayofmonth(c0)", {date}, {DATE()}); + return evaluateOnce("dayofmonth(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, day(std::nullopt)); EXPECT_EQ(30, day(parseDate("2009-07-30"))); @@ -544,7 +544,7 @@ TEST_F(DateTimeFunctionsTest, dayOfMonth) { TEST_F(DateTimeFunctionsTest, dayOfWeekDate) { const auto dayOfWeek = [&](std::optional date) { - return evaluateOnce("dayofweek(c0)", {date}, {DATE()}); + return evaluateOnce("dayofweek(c0)", DATE(), date); }; EXPECT_EQ(std::nullopt, dayOfWeek(std::nullopt)); @@ -568,7 +568,7 @@ TEST_F(DateTimeFunctionsTest, dayOfWeekDate) { TEST_F(DateTimeFunctionsTest, weekdayDate) { const auto weekday = [&](std::optional value) { - return evaluateOnce("weekday(c0)", {value}, {DATE()}); + return evaluateOnce("weekday(c0)", DATE(), value); }; EXPECT_EQ(3, weekday(0)); @@ -593,8 +593,8 @@ TEST_F(DateTimeFunctionsTest, weekdayDate) { TEST_F(DateTimeFunctionsTest, dateDiffDate) { const auto dateDiff = [&](std::optional endDate, std::optional startDate) { - return evaluateOnce( - "datediff(c0, c1)", {endDate, startDate}, {DATE(), DATE()}); + return evaluateOnce( + "datediff(c0, c1)", {DATE(), DATE()}, endDate, startDate); }; // Simple tests. @@ -616,11 +616,13 @@ TEST_F(DateTimeFunctionsTest, dateDiffDate) { } TEST_F(DateTimeFunctionsTest, addMonths) { - const auto addMonths = [&](const std::string& dateString, int32_t value) { - return evaluateOnce( + const auto addMonths = [&](const std::string& dateString, + std::optional value) { + return evaluateOnce( "add_months(c0, c1)", - {parseDate(dateString), value}, - {DATE(), INTEGER()}); + {DATE(), INTEGER()}, + std::make_optional(parseDate(dateString)), + value); }; EXPECT_EQ(addMonths("2015-01-30", 1), parseDate("2015-02-28")); @@ -646,8 +648,8 @@ TEST_F(DateTimeFunctionsTest, addMonths) { TEST_F(DateTimeFunctionsTest, monthDate) { const auto month = [&](const std::string& dateString) { - return evaluateOnce( - "month(c0)", {parseDate(dateString)}, {DATE()}); + return evaluateOnce( + "month(c0)", DATE(), std::make_optional(parseDate(dateString))); }; EXPECT_EQ(4, month("2015-04-08")); @@ -658,8 +660,8 @@ TEST_F(DateTimeFunctionsTest, monthDate) { TEST_F(DateTimeFunctionsTest, quarterDate) { const auto quarter = [&](const std::string& dateString) { - return evaluateOnce( - "quarter(c0)", {parseDate(dateString)}, {DATE()}); + return evaluateOnce( + "quarter(c0)", DATE(), std::make_optional(parseDate(dateString))); }; EXPECT_EQ(2, quarter("2015-04-08")); @@ -917,23 +919,14 @@ TEST_F(DateTimeFunctionsTest, fromUnixtime) { TEST_F(DateTimeFunctionsTest, makeYMInterval) { const auto fromYearAndMonth = [&](const std::optional& year, - const std::optional& month) { - auto result = evaluateOnce( - "make_ym_interval(c0, c1)", - {year, month}, - {INTEGER(), INTEGER()}, - std::nullopt, - {INTERVAL_YEAR_MONTH()}); + const std::optional& month) { + auto result = + evaluateOnce("make_ym_interval(c0, c1)", year, month); VELOX_CHECK(result.has_value()); return INTERVAL_YEAR_MONTH()->valueToString(result.value()); }; const auto fromYear = [&](const std::optional& year) { - auto result = evaluateOnce( - "make_ym_interval(c0)", - {year}, - {INTEGER()}, - std::nullopt, - {INTERVAL_YEAR_MONTH()}); + auto result = evaluateOnce("make_ym_interval(c0)", year); VELOX_CHECK(result.has_value()); return INTERVAL_YEAR_MONTH()->valueToString(result.value()); }; @@ -972,7 +965,7 @@ TEST_F(DateTimeFunctionsTest, makeYMInterval) { TEST_F(DateTimeFunctionsTest, yearOfWeek) { const auto yearOfWeek = [&](std::optional date) { - return evaluateOnce("year_of_week(c0)", {date}, {DATE()}); + return evaluateOnce("year_of_week(c0)", DATE(), date); }; EXPECT_EQ(1970, yearOfWeek(0)); EXPECT_EQ(1970, yearOfWeek(-1)); @@ -987,8 +980,9 @@ TEST_F(DateTimeFunctionsTest, yearOfWeek) { TEST_F(DateTimeFunctionsTest, unixSeconds) { const auto unixSeconds = [&](const StringView time) { - return evaluateOnce( - "unix_seconds(c0)", util::fromTimestampString(time)); + return evaluateOnce( + "unix_seconds(c0)", + std::make_optional(util::fromTimestampString(time))); }; EXPECT_EQ(unixSeconds("1970-01-01 00:00:01"), 1); EXPECT_EQ(unixSeconds("1970-01-01 00:00:00.000127"), 0); @@ -998,8 +992,8 @@ TEST_F(DateTimeFunctionsTest, unixSeconds) { } TEST_F(DateTimeFunctionsTest, microsToTimestamp) { - const auto microsToTimestamp = [&](int64_t micros) { - return evaluateOnce("timestamp_micros(c0)", micros); + const auto microsToTimestamp = [&](std::optional micros) { + return evaluateOnce("timestamp_micros(c0)", micros); }; EXPECT_EQ( microsToTimestamp(1000000), diff --git a/velox/functions/sparksql/tests/LeastGreatestTest.cpp b/velox/functions/sparksql/tests/LeastGreatestTest.cpp index 729b8f767bea2..503447a9e26a5 100644 --- a/velox/functions/sparksql/tests/LeastGreatestTest.cpp +++ b/velox/functions/sparksql/tests/LeastGreatestTest.cpp @@ -27,8 +27,8 @@ class LeastTest : public SparkFunctionBaseTest { std::optional arg1, std::optional arg2, const TypePtr& type = CppToType::create()) { - return evaluateOnce( - "least(c0, c1, c2)", {arg0, arg1, arg2}, {type, type, type}); + return evaluateOnce( + "least(c0, c1, c2)", {type, type, type}, arg0, arg1, arg2); } template @@ -38,10 +38,13 @@ class LeastTest : public SparkFunctionBaseTest { std::optional arg2, std::optional arg3, const TypePtr& type = CppToType::create()) { - return evaluateOnce( + return evaluateOnce( "least(c0, c1, c2, c3)", - {arg0, arg1, arg2, arg3}, - {type, type, type, type}); + {type, type, type, type}, + arg0, + arg1, + arg2, + arg3); } template @@ -184,8 +187,8 @@ class GreatestTest : public SparkFunctionBaseTest { std::optional arg1, std::optional arg2, const TypePtr& type = CppToType::create()) { - return evaluateOnce( - "greatest(c0, c1, c2)", {arg0, arg1, arg2}, {type, type, type}); + return evaluateOnce( + "greatest(c0, c1, c2)", {type, type, type}, arg0, arg1, arg2); } template @@ -195,10 +198,13 @@ class GreatestTest : public SparkFunctionBaseTest { std::optional arg2, std::optional arg3, const TypePtr& type = CppToType::create()) { - return evaluateOnce( + return evaluateOnce( "greatest(c0, c1, c2, c3)", - {arg0, arg1, arg2, arg3}, - {type, type, type, type}); + {type, type, type, type}, + arg0, + arg1, + arg2, + arg3); } template diff --git a/velox/functions/sparksql/tests/MakeTimestampTest.cpp b/velox/functions/sparksql/tests/MakeTimestampTest.cpp index 9b63d42af20e2..13728236fef1f 100644 --- a/velox/functions/sparksql/tests/MakeTimestampTest.cpp +++ b/velox/functions/sparksql/tests/MakeTimestampTest.cpp @@ -115,19 +115,30 @@ TEST_F(MakeTimestampTest, errors) { auto result = evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data); facebook::velox::test::assertEqualVectors(expected, result); }; + std::optional one = 1; const auto testInvalidSeconds = [&](std::optional microsec) { - auto result = evaluateOnce( + auto result = evaluateOnce( "make_timestamp(c0, c1, c2, c3, c4, c5)", - {1, 1, 1, 1, 1, microsec}, - {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}); + {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, + one, + one, + one, + one, + one, + microsec); EXPECT_EQ(result, std::nullopt); }; - const auto testInvalidArguments = [&](int64_t microsec, + const auto testInvalidArguments = [&](std::optional microsec, const TypePtr& microsType) { - return evaluateOnce( + return evaluateOnce( "make_timestamp(c0, c1, c2, c3, c4, c5)", - {1, 1, 1, 1, 1, microsec}, - {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}); + {INTEGER(), INTEGER(), INTEGER(), INTEGER(), INTEGER(), microsType}, + one, + one, + one, + one, + one, + microsec); }; // Throw if no session time zone. diff --git a/velox/functions/sparksql/tests/StringTest.cpp b/velox/functions/sparksql/tests/StringTest.cpp index 60bf3516bf06c..b0e3b8419ae02 100644 --- a/velox/functions/sparksql/tests/StringTest.cpp +++ b/velox/functions/sparksql/tests/StringTest.cpp @@ -66,8 +66,7 @@ TEST_F(StringTest, bitLength) { TEST_F(StringTest, bitLengthVarbinary) { const auto bitLength = [&](const std::optional& arg) { - return evaluateOnce( - "bit_length(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("bit_length(c0)", VARBINARY(), arg); }; EXPECT_EQ(bitLength(""), 0); @@ -282,8 +281,7 @@ TEST_F(StringTest, lengthString) { TEST_F(StringTest, lengthVarbinary) { const auto length = [&](const std::optional& arg) { - return evaluateOnce( - "length(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("length(c0)", VARBINARY(), arg); }; EXPECT_EQ(length(""), 0); EXPECT_EQ(length(std::string("\0", 1)), 1); @@ -386,8 +384,7 @@ TEST_F(StringTest, ltrim) { TEST_F(StringTest, md5) { const auto md5 = [&](const std::optional& arg) { - return evaluateOnce( - "md5(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("md5(c0)", VARBINARY(), arg); }; EXPECT_EQ(md5(std::nullopt), std::nullopt); EXPECT_EQ(md5(""), "d41d8cd98f00b204e9800998ecf8427e"); @@ -570,8 +567,7 @@ TEST_F(StringTest, rtrim) { TEST_F(StringTest, sha1) { const auto sha1 = [&](const std::optional& arg) { - return evaluateOnce( - "sha1(c0)", {arg}, {VARBINARY()}); + return evaluateOnce("sha1(c0)", VARBINARY(), arg); }; EXPECT_EQ(sha1(std::nullopt), std::nullopt);