Skip to content

Commit

Permalink
convert large integers to double
Browse files Browse the repository at this point in the history
  • Loading branch information
swalrus1 committed Oct 16, 2024
1 parent 3b3e4b4 commit ee6b1e3
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 27 deletions.
4 changes: 4 additions & 0 deletions ydb/library/binary_json/ut_benchmark/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ SRCS(
PEERDIR(
library/cpp/testing/unittest
ydb/library/binary_json
ydb/library/yql/minikql/dom
ydb/library/yql/minikql/invoke_builtins/llvm14
ydb/library/yql/public/udf/service/exception_policy
ydb/library/yql/core/issue/protos
ydb/library/yql/sql/pg_dummy
)

YQL_LAST_ABI_VERSION()
Expand Down
44 changes: 18 additions & 26 deletions ydb/library/binary_json/write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,10 +548,9 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call
}
}

// unused, left for performance comparison
template <typename TOnDemandValue>
requires std::is_same_v<TOnDemandValue, simdjson::ondemand::value> || std::is_same_v<TOnDemandValue, simdjson::ondemand::document>
[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) {
[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) {
#define RETURN_IF_NOT_SUCCESS(error) \
if (Y_UNLIKELY(error != simdjson::SUCCESS)) { \
return error; \
Expand Down Expand Up @@ -591,7 +590,10 @@ template <typename TOnDemandValue>
break;
}
case simdjson::fallback::number_type::big_integer:
return simdjson::NUMBER_OUT_OF_RANGE;
double v;
RETURN_IF_NOT_SUCCESS(value.get(v));
callbacks.OnDouble(v);
break;
}
break;
}
Expand All @@ -605,7 +607,7 @@ template <typename TOnDemandValue>
RETURN_IF_NOT_SUCCESS(value.get(v));
for (auto item : v) {
RETURN_IF_NOT_SUCCESS(item.error());
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value_unsafe(), callbacks));
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value_unsafe(), callbacks));
}

callbacks.OnCloseArray();
Expand All @@ -622,7 +624,7 @@ template <typename TOnDemandValue>
const auto key = keyValue.unescaped_key();
RETURN_IF_NOT_SUCCESS(key.error());
callbacks.OnMapKey(key.value_unsafe());
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(keyValue.value(), callbacks));
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(keyValue.value(), callbacks));
}

callbacks.OnCloseMap();
Expand All @@ -635,7 +637,8 @@ template <typename TOnDemandValue>
#undef RETURN_IF_NOT_SUCCESS
}

[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) {
// unused, left for performance comparison
[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) {
#define RETURN_IF_NOT_SUCCESS(status) \
if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \
return status; \
Expand Down Expand Up @@ -681,7 +684,7 @@ template <typename TOnDemandValue>
simdjson::dom::array v;
RETURN_IF_NOT_SUCCESS(value.get(v));
for (const auto& item : v) {
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item, callbacks));
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item, callbacks));
}

callbacks.OnCloseArray();
Expand All @@ -694,7 +697,7 @@ template <typename TOnDemandValue>
RETURN_IF_NOT_SUCCESS(value.get(v));
for (const auto& item : v) {
callbacks.OnMapKey(item.key);
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value, callbacks));
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value, callbacks));
}

callbacks.OnCloseMap();
Expand All @@ -706,28 +709,17 @@ template <typename TOnDemandValue>
}
}

TMaybe<TBinaryJson> SerializeToBinaryJsonImplRapidjson(const TStringBuf json) {
TMemoryInput input(json.data(), json.size());
TBinaryJsonCallbacks callbacks(/* throwException */ false);
if (!ReadJson(&input, &callbacks)) {
return Nothing();
}
TBinaryJsonSerializer serializer(std::move(callbacks).GetResult());
return std::move(serializer).Serialize();
}

TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) {
thread_local simdjson::dom::parser parser;
auto doc = parser.parse(json);
thread_local simdjson::ondemand::parser parser;
const simdjson::padded_string paddedJson(json);
auto doc = parser.iterate(paddedJson);
if (doc.error() != simdjson::SUCCESS) {
if (doc.error() == simdjson::BIGINT_ERROR) {
return SerializeToBinaryJsonImplRapidjson(json);
}
return Nothing();
Y_ABORT_IF(doc.error() == simdjson::BIGINT_ERROR);
return false;
}
TBinaryJsonCallbacks callbacks(/* throwException */ false);
if (SimdJsonToJsonIndex(doc.value(), callbacks) != simdjson::SUCCESS) {
return Nothing();
if (SimdJsonToJsonIndex(doc.value_unsafe(), callbacks) != simdjson::SUCCESS) {
return false;
}
TBinaryJsonSerializer serializer(std::move(callbacks).GetResult());
return std::move(serializer).Serialize();
Expand Down
47 changes: 46 additions & 1 deletion ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,52 @@ class TJsonPathCommonTest : public TJsonPathTestBase {
"array": [1, 2, 3, 4]
})", "$.array[+$.range.from to +$.range.to]", {"2", "3"}},
{R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}},
{"10000000000000000000000000", "-$", {"-9.999999999999999e+24"}},
{"30000000000000000000000000", "-$", {"-3e+25"}},
};

for (const auto& testCase : testCases) {
for (const auto mode : ALL_MODES) {
RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result);
}
}
}

void TestBigint() {
const TVector<TMultiOutputTestCase> testCases = {
{R"([])", "-3", {"-3"}},
{R"([])", "+3", {"3"}},
{R"(-1)", "-$", {"1"}},
{R"(-1)", "+$", {"-1"}},
{R"({
"range": {
"from": -1,
"to": -2
},
"array": [1, 2, 3, 4]
})", "$.array[-$.range.from to -$.range.to]", {"2", "3"}},
{R"({
"range": {
"from": 1,
"to": -2
},
"array": [1, 2, 3, 4]
})", "$.array[+$.range.from to -$.range.to]", {"2", "3"}},
{R"({
"range": {
"from": -1,
"to": 2
},
"array": [1, 2, 3, 4]
})", "$.array[-$.range.from to +$.range.to]", {"2", "3"}},
{R"({
"range": {
"from": 1,
"to": 2
},
"array": [1, 2, 3, 4]
})", "$.array[+$.range.from to +$.range.to]", {"2", "3"}},
{R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}},
{"100000000000000000000000000", "-$", {"-1e+26"}},
};

for (const auto& testCase : testCases) {
Expand Down

0 comments on commit ee6b1e3

Please sign in to comment.