Skip to content

Commit

Permalink
fix: UserErrors leave json_parse in an invalid state (#12308)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #12308

When json_parse throws a user error and it's not wrapped in a try, if that exception is caught
outside expression evaluation and the ExprSet attempts to be reused, the json_parse function
may be in an invalid state and will throw a runtime exception.

This was exposed in ExpressionFuzzer after
#12281 was landed.

This fix is to clear the state whenever we see a UserException after calling parse whether or not
the function is wrapped in a try.

Reviewed By: kgpai

Differential Revision: D69473523

fbshipit-source-id: d4ae34d65136ce80138f9ed2e7f6f86bf566011e
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Feb 11, 2025
1 parent 5568518 commit b71648f
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 8 deletions.
7 changes: 5 additions & 2 deletions velox/functions/prestosql/JsonFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,15 @@ class JsonParseFunction : public exec::VectorFunction {

if (auto error = parse(size, needNormalize)) {
context.setErrors(rows, errors_[error]);
clearState();
return;
}
auto* output = buffer->asMutable<char>();
auto outputSize = concatViews(views_, output);
rawStringViews[0] = StringView(output, outputSize);
} catch (const VeloxException& e) {
clearState();

if (!e.isUserError()) {
throw;
}
Expand Down Expand Up @@ -256,6 +259,8 @@ class JsonParseFunction : public exec::VectorFunction {
output += outputSize;
}
} catch (const VeloxException& e) {
clearState();

if (!e.isUserError()) {
throw;
}
Expand All @@ -264,8 +269,6 @@ class JsonParseFunction : public exec::VectorFunction {

FB_LOG_EVERY_MS(WARNING, 1000)
<< "Caught user error in json_parse: " << e.message();

clearState();
}
});

Expand Down
66 changes: 60 additions & 6 deletions velox/functions/prestosql/tests/JsonFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,17 +376,17 @@ TEST_F(JsonFunctionsTest, jsonParse) {

// Test try with invalid escape sequence.
{
auto data = makeRowVector({makeFlatVector<StringView>({
data = makeRowVector({makeFlatVector<StringView>({
// The logic for sorting keys checks the validity of escape sequences
// and will throw a user error in this case.
"{\"k\\i\":\"abc\",\"k2\":\"xyz\"}", // invalid json
// Add a second value to ensure the state is cleared.
R"([{"k1": "v1" }, {"k2": "v2" }])" // valid json
})});

auto result = evaluate("try(json_parse(c0))", data);
result = evaluate("try(json_parse(c0))", data);

auto expected = makeNullableFlatVector<StringView>(
expected = makeNullableFlatVector<StringView>(
{std::nullopt, R"([{"k1":"v1"},{"k2":"v2"}])"}, JSON());

velox::test::assertEqualVectors(expected, result);
Expand All @@ -395,16 +395,70 @@ TEST_F(JsonFunctionsTest, jsonParse) {
// Test try with invalid escape sequence going through fast path for
// constants.
{
auto data =
data =
makeRowVector({makeConstant("{\"k\\i\":\"abc\",\"k2\":\"xyz\"}", 3)});

auto result = evaluate("try(json_parse(c0))", data);
result = evaluate("try(json_parse(c0))", data);

auto expected = makeNullableFlatVector<StringView>(
expected = makeNullableFlatVector<StringView>(
{std::nullopt, std::nullopt, std::nullopt}, JSON());

velox::test::assertEqualVectors(expected, result);
}

// Test reusing ExprSet after user exception thrown.
{
// A batch with an invalid value that will throw a user error.
auto data1 = makeRowVector(
{makeConstant(R"({\"k\\i\":\"abc\",\"k2\":\"xyz\u4FE\"})", 3)});

// A batch with valid values.
auto data2 = makeRowVector({makeFlatVector<StringView>(
{R"([{"k1": "v1" }, {"k2": "v2" }])",
R"([{"k3": "v3" }, {"k4": "v4" }])"})});

auto typedExpr = makeTypedExpr("json_parse(c0)", asRowType(data1->type()));
exec::ExprSet exprSet({typedExpr}, &execCtx_);

VELOX_ASSERT_USER_THROW(evaluate(exprSet, data1), "Invalid escape digit");

expected = makeNullableFlatVector<StringView>(
{R"([{"k1":"v1"},{"k2":"v2"}])", R"([{"k3":"v3"},{"k4":"v4"}])"},
JSON());

result = evaluate(exprSet, data2);
velox::test::assertEqualVectors(expected, result);
}

// Test reusing ExprSet after user exception thrown in fast path for
// constants.
{
// A batch with an invalid value that will throw a user error.
auto data1 = makeRowVector({makeFlatVector<StringView>({
// The logic for sorting keys checks the validity of escape sequences
// and will throw a user error if unicode sequences are invalid.
R"({"k\\i":"abc","k2":"xyz\u4FE"})", // invalid json
// Add a second value to ensure the state is cleared.
R"([{"k1": "v1" }, {"k2": "v2" }])" // valid json
})});

// A batch with valid values.
auto data2 = makeRowVector({makeFlatVector<StringView>(
{R"([{"k1": "v1" }, {"k2": "v2" }])",
R"([{"k3": "v3" }, {"k4": "v4" }])"})});

auto typedExpr = makeTypedExpr("json_parse(c0)", asRowType(data1->type()));
exec::ExprSet exprSet({typedExpr}, &execCtx_);

VELOX_ASSERT_USER_THROW(evaluate(exprSet, data1), "Invalid escape digit");

expected = makeNullableFlatVector<StringView>(
{R"([{"k1":"v1"},{"k2":"v2"}])", R"([{"k3":"v3"},{"k4":"v4"}])"},
JSON());

result = evaluate(exprSet, data2);
velox::test::assertEqualVectors(expected, result);
}
}

TEST_F(JsonFunctionsTest, canonicalization) {
Expand Down

0 comments on commit b71648f

Please sign in to comment.