Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add json_array_contains Presto Json function #2299

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions velox/docs/functions/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ JSON Functions

SELECT json_array_length('[1, 2, 3]');

.. function:: json_array_contains(json, value) -> boolean

Determine if ``value`` exists in ``json`` (a string containing a JSON
array). ``value`` could be a boolean, bigint, double, or varchar.
Returns NULL if ``json`` is not an array::

SELECT json_array_contains('[1, 2, 3]', 2);

============
JSON Vectors
============
Expand Down
35 changes: 35 additions & 0 deletions velox/functions/prestosql/JsonFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,39 @@ struct JsonArrayLengthFunction {
}
};

template <typename T>
struct JsonArrayContainsFunction {
pramodsatya marked this conversation as resolved.
Show resolved Hide resolved
VELOX_DEFINE_FUNCTION_TYPES(T);

template <typename TInput>
FOLLY_ALWAYS_INLINE bool
call(bool& result, const arg_type<Varchar>& json, const TInput& value) {
auto parsedJson = folly::parseJson(json);
if (!parsedJson.isArray()) {
return false;
}

result = false;
bool valueBool = std::is_same_v<TInput, bool>;
bool valueInt = std::is_same_v<TInput, int64_t>;
bool valueDouble = std::is_same_v<TInput, double>;
for (const auto& v : parsedJson) {
pramodsatya marked this conversation as resolved.
Show resolved Hide resolved
if (valueBool && v.isBool() && v == value) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::is_same_v<TInput, bool> can be determined at compile time, so it's better not to do runtime checks on it. (Same for int64_t and double.) With the current code, valueBool, valueInt, and valueDouble are computed and checked at runtime. Consider using if constexpr (...) for type checks and do the value comparison inside. The rest looks good to me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks this is a good suggestion. Agree its best to handle this in compilation. Otherwise choosing to write the templated code while taking these runtime hits is not worth the tradeoff.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion, changed accordingly

result = true;
break;
} else if (valueInt && v.isInt() && v == value) {
result = true;
break;
} else if (valueDouble && v.isDouble() && v == value) {
result = true;
break;
} else if (v.isString() && v == value) {
result = true;
break;
}
}
return true;
}
};

} // namespace facebook::velox::functions
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ void registerJsonFunctions() {
{"json_extract_scalar"});
registerFunction<JsonArrayLengthFunction, int64_t, Varchar>(
{"json_array_length"});
registerFunction<JsonArrayContainsFunction, bool, Varchar, bool>(
{"json_array_contains"});
registerFunction<JsonArrayContainsFunction, bool, Varchar, int64_t>(
{"json_array_contains"});
registerFunction<JsonArrayContainsFunction, bool, Varchar, double>(
{"json_array_contains"});
registerFunction<JsonArrayContainsFunction, bool, Varchar, Varchar>(
{"json_array_contains"});
}

} // namespace facebook::velox::functions
183 changes: 183 additions & 0 deletions velox/functions/prestosql/tests/JsonFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ class JsonFunctionsTest : public functions::test::FunctionBaseTest {
std::optional<int64_t> json_array_length(std::optional<std::string> json) {
return evaluateOnce<int64_t>("json_array_length(c0)", json);
}

template <typename T>
std::optional<bool> json_array_contains(
std::optional<std::string> json,
std::optional<T> value) {
return evaluateOnce<bool>("json_array_contains(c0, c1)", json, value);
}
};

TEST_F(JsonFunctionsTest, isJsonScalar) {
Expand Down Expand Up @@ -67,6 +74,182 @@ TEST_F(JsonFunctionsTest, jsonArrayLength) {
EXPECT_EQ(json_array_length(R"({"k1":[0,1,2], "k2":"v1"})"), std::nullopt);
}

TEST_F(JsonFunctionsTest, jsonArrayContainsBool) {
EXPECT_EQ(json_array_contains<bool>(R"([])", true), false);
EXPECT_EQ(json_array_contains<bool>(R"([1, 2, 3])", false), false);
EXPECT_EQ(json_array_contains<bool>(R"([1.2, 2.3, 3.4])", true), false);
EXPECT_EQ(
json_array_contains<bool>(R"(["hello", "presto", "world"])", false),
false);
EXPECT_EQ(json_array_contains<bool>(R"(1)", true), std::nullopt);
EXPECT_EQ(
json_array_contains<bool>(R"("thefoxjumpedoverthefence")", false),
std::nullopt);
EXPECT_EQ(json_array_contains<bool>(R"("")", false), std::nullopt);
EXPECT_EQ(json_array_contains<bool>(R"(true)", true), std::nullopt);
EXPECT_EQ(
json_array_contains<bool>(R"({"k1":[0,1,2], "k2":"v1"})", true),
std::nullopt);

EXPECT_EQ(json_array_contains<bool>(R"([true, false])", true), true);
EXPECT_EQ(json_array_contains<bool>(R"([true, true])", false), false);
EXPECT_EQ(
json_array_contains<bool>(R"([123, 123.456, true, "abc"])", true), true);
EXPECT_EQ(
json_array_contains<bool>(R"([123, 123.456, true, "abc"])", false),
false);
EXPECT_EQ(
json_array_contains<bool>(
R"([false, false, false, false, false, false, false,
false, false, false, false, false, false, true, false, false, false, false])",
true),
true);
EXPECT_EQ(
json_array_contains<bool>(
R"([true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true, true, true, true])",
false),
false);
}

TEST_F(JsonFunctionsTest, jsonArrayContainsInt) {
EXPECT_EQ(json_array_contains<int64_t>(R"([])", 0), false);
EXPECT_EQ(json_array_contains<int64_t>(R"([1.2, 2.3, 3.4])", 2), false);
EXPECT_EQ(json_array_contains<int64_t>(R"([1.2, 2.0, 3.4])", 2), false);
EXPECT_EQ(
json_array_contains<int64_t>(R"(["hello", "presto", "world"])", 2),
false);
EXPECT_EQ(
json_array_contains<int64_t>(R"([false, false, false])", 17), false);
EXPECT_EQ(json_array_contains<int64_t>(R"(1)", 1), std::nullopt);
EXPECT_EQ(
json_array_contains<int64_t>(R"("thefoxjumpedoverthefence")", 1),
std::nullopt);
EXPECT_EQ(json_array_contains<int64_t>(R"("")", 1), std::nullopt);
EXPECT_EQ(json_array_contains<int64_t>(R"(true)", 1), std::nullopt);
EXPECT_EQ(
json_array_contains<int64_t>(R"({"k1":[0,1,2], "k2":"v1"})", 1),
std::nullopt);

EXPECT_EQ(json_array_contains<int64_t>(R"([1, 2, 3])", 1), true);
EXPECT_EQ(json_array_contains<int64_t>(R"([1, 2, 3])", 4), false);
EXPECT_EQ(
json_array_contains<int64_t>(R"([123, 123.456, true, "abc"])", 123),
true);
EXPECT_EQ(
json_array_contains<int64_t>(R"([123, 123.456, true, "abc"])", 456),
false);
EXPECT_EQ(
json_array_contains<int64_t>(
R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])",
17),
true);
EXPECT_EQ(
json_array_contains<int64_t>(
R"([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])",
23),
false);
}

TEST_F(JsonFunctionsTest, jsonArrayContainsDouble) {
EXPECT_EQ(json_array_contains<double>(R"([])", 2.3), false);
EXPECT_EQ(json_array_contains<double>(R"([1, 2, 3])", 2.3), false);
EXPECT_EQ(json_array_contains<double>(R"([1, 2, 3])", 2.0), false);
EXPECT_EQ(
json_array_contains<double>(R"(["hello", "presto", "world"])", 2.3),
false);
EXPECT_EQ(
json_array_contains<double>(R"([false, false, false])", 2.3), false);
EXPECT_EQ(json_array_contains<double>(R"(1)", 2.3), std::nullopt);
EXPECT_EQ(
json_array_contains<double>(R"("thefoxjumpedoverthefence")", 2.3),
std::nullopt);
EXPECT_EQ(json_array_contains<double>(R"("")", 2.3), std::nullopt);
EXPECT_EQ(json_array_contains<double>(R"(true)", 2.3), std::nullopt);
EXPECT_EQ(
json_array_contains<double>(R"({"k1":[0,1,2], "k2":"v1"})", 2.3),
std::nullopt);

EXPECT_EQ(json_array_contains<double>(R"([1.2, 2.3, 3.4])", 2.3), true);
EXPECT_EQ(json_array_contains<double>(R"([1.2, 2.3, 3.4])", 2.4), false);
EXPECT_EQ(
json_array_contains<double>(R"([123, 123.456, true, "abc"])", 123.456),
true);
EXPECT_EQ(
json_array_contains<double>(R"([123, 123.456, true, "abc"])", 456.789),
false);
EXPECT_EQ(
json_array_contains<double>(
R"([1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5])",
4.5),
true);
EXPECT_EQ(
json_array_contains<double>(
R"([1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5, 1.2, 2.3, 3.4, 4.5])",
4.3),
false);
}

TEST_F(JsonFunctionsTest, jsonArrayContainsString) {
EXPECT_EQ(json_array_contains<std::string>(R"([])", ""), false);
pramodsatya marked this conversation as resolved.
Show resolved Hide resolved
EXPECT_EQ(json_array_contains<std::string>(R"([1, 2, 3])", "1"), false);
EXPECT_EQ(
json_array_contains<std::string>(R"([1.2, 2.3, 3.4])", "2.3"), false);
EXPECT_EQ(
json_array_contains<std::string>(R"([true, false])", R"("true")"), false);
EXPECT_EQ(json_array_contains<std::string>(R"(1)", "1"), std::nullopt);
EXPECT_EQ(
json_array_contains<std::string>(R"("thefoxjumpedoverthefence")", "1"),
std::nullopt);
EXPECT_EQ(json_array_contains<std::string>(R"("")", "1"), std::nullopt);
EXPECT_EQ(json_array_contains<std::string>(R"(true)", "1"), std::nullopt);
EXPECT_EQ(
json_array_contains<std::string>(R"({"k1":[0,1,2], "k2":"v1"})", "1"),
std::nullopt);

EXPECT_EQ(
json_array_contains<std::string>(
R"(["hello", "presto", "world"])", "presto"),
true);
EXPECT_EQ(
json_array_contains<std::string>(
R"(["hello", "presto", "world"])", "nation"),
false);
EXPECT_EQ(
json_array_contains<std::string>(R"([123, 123.456, true, "abc"])", "abc"),
true);
EXPECT_EQ(
json_array_contains<std::string>(R"([123, 123.456, true, "abc"])", "def"),
false);
EXPECT_EQ(
json_array_contains<std::string>(
R"(["hello", "presto", "world", "hello", "presto", "world", "hello", "presto", "world", "hello",
"presto", "world", "hello", "presto", "world", "hello", "presto", "world"])",
"hello"),
true);
EXPECT_EQ(
json_array_contains<std::string>(
R"(["hello", "presto", "world", "hello", "presto", "world", "hello", "presto", "world", "hello",
"presto", "world", "hello", "presto", "world", "hello", "presto", "world"])",
"hola"),
false);
EXPECT_EQ(
json_array_contains<std::string>(
R"(["hello", "presto", "world", 1, 2, 3, true, false, 1.2, 2.3, {"k1":[0,1,2], "k2":"v1"}])",
"world"),
true);
EXPECT_EQ(
json_array_contains<std::string>(
R"(["the fox jumped over the fence", "hello presto world"])",
"hello velox world"),
false);
EXPECT_EQ(
json_array_contains<std::string>(
R"(["the fox jumped over the fence", "hello presto world"])",
"the fox jumped over the fence"),
true);
}

} // namespace

} // namespace facebook::velox::functions::prestosql