From ee6b1e3c12a16d91d56f2bc5b73df6ab98e659a5 Mon Sep 17 00:00:00 2001 From: Semyon Yentsov Date: Wed, 16 Oct 2024 13:50:58 +0000 Subject: [PATCH] convert large integers to double --- ydb/library/binary_json/ut_benchmark/ya.make | 4 ++ ydb/library/binary_json/write.cpp | 44 +++++++---------- .../yql/minikql/jsonpath/ut/common_ut.cpp | 47 ++++++++++++++++++- 3 files changed, 68 insertions(+), 27 deletions(-) diff --git a/ydb/library/binary_json/ut_benchmark/ya.make b/ydb/library/binary_json/ut_benchmark/ya.make index 7cf58e0abc16..626cc4e426a2 100644 --- a/ydb/library/binary_json/ut_benchmark/ya.make +++ b/ydb/library/binary_json/ut_benchmark/ya.make @@ -18,7 +18,11 @@ SRCS( PEERDIR( library/cpp/testing/unittest ydb/library/binary_json + ydb/library/yql/minikql/dom + ydb/library/yql/minikql/invoke_builtins/llvm14 ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/core/issue/protos + ydb/library/yql/sql/pg_dummy ) YQL_LAST_ABI_VERSION() diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 98991c5116d4..15964d6d2c71 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -548,10 +548,9 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call } } -// unused, left for performance comparison template requires std::is_same_v || std::is_same_v -[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) { +[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) { #define RETURN_IF_NOT_SUCCESS(error) \ if (Y_UNLIKELY(error != simdjson::SUCCESS)) { \ return error; \ @@ -591,7 +590,10 @@ template break; } case simdjson::fallback::number_type::big_integer: - return simdjson::NUMBER_OUT_OF_RANGE; + double v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnDouble(v); + break; } break; } @@ -605,7 +607,7 @@ template RETURN_IF_NOT_SUCCESS(value.get(v)); for (auto item : v) { RETURN_IF_NOT_SUCCESS(item.error()); - RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value_unsafe(), callbacks)); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value_unsafe(), callbacks)); } callbacks.OnCloseArray(); @@ -622,7 +624,7 @@ template const auto key = keyValue.unescaped_key(); RETURN_IF_NOT_SUCCESS(key.error()); callbacks.OnMapKey(key.value_unsafe()); - RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(keyValue.value(), callbacks)); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(keyValue.value(), callbacks)); } callbacks.OnCloseMap(); @@ -635,7 +637,8 @@ template #undef RETURN_IF_NOT_SUCCESS } -[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { +// unused, left for performance comparison +[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { #define RETURN_IF_NOT_SUCCESS(status) \ if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \ return status; \ @@ -681,7 +684,7 @@ template simdjson::dom::array v; RETURN_IF_NOT_SUCCESS(value.get(v)); for (const auto& item : v) { - RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item, callbacks)); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item, callbacks)); } callbacks.OnCloseArray(); @@ -694,7 +697,7 @@ template RETURN_IF_NOT_SUCCESS(value.get(v)); for (const auto& item : v) { callbacks.OnMapKey(item.key); - RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value, callbacks)); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value, callbacks)); } callbacks.OnCloseMap(); @@ -706,28 +709,17 @@ template } } -TMaybe SerializeToBinaryJsonImplRapidjson(const TStringBuf json) { - TMemoryInput input(json.data(), json.size()); - TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (!ReadJson(&input, &callbacks)) { - return Nothing(); - } - TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); - return std::move(serializer).Serialize(); -} - TMaybe SerializeToBinaryJsonImpl(const TStringBuf json) { - thread_local simdjson::dom::parser parser; - auto doc = parser.parse(json); + thread_local simdjson::ondemand::parser parser; + const simdjson::padded_string paddedJson(json); + auto doc = parser.iterate(paddedJson); if (doc.error() != simdjson::SUCCESS) { - if (doc.error() == simdjson::BIGINT_ERROR) { - return SerializeToBinaryJsonImplRapidjson(json); - } - return Nothing(); + Y_ABORT_IF(doc.error() == simdjson::BIGINT_ERROR); + return false; } TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (SimdJsonToJsonIndex(doc.value(), callbacks) != simdjson::SUCCESS) { - return Nothing(); + if (SimdJsonToJsonIndex(doc.value_unsafe(), callbacks) != simdjson::SUCCESS) { + return false; } TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); return std::move(serializer).Serialize(); diff --git a/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp b/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp index 087759f769b5..4a4ef0f8b6d2 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp +++ b/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp @@ -339,7 +339,52 @@ class TJsonPathCommonTest : public TJsonPathTestBase { "array": [1, 2, 3, 4] })", "$.array[+$.range.from to +$.range.to]", {"2", "3"}}, {R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}}, - {"10000000000000000000000000", "-$", {"-9.999999999999999e+24"}}, + {"30000000000000000000000000", "-$", {"-3e+25"}}, + }; + + for (const auto& testCase : testCases) { + for (const auto mode : ALL_MODES) { + RunTestCase(testCase.Json, mode + testCase.JsonPath, testCase.Result); + } + } + } + + void TestBigint() { + const TVector testCases = { + {R"([])", "-3", {"-3"}}, + {R"([])", "+3", {"3"}}, + {R"(-1)", "-$", {"1"}}, + {R"(-1)", "+$", {"-1"}}, + {R"({ + "range": { + "from": -1, + "to": -2 + }, + "array": [1, 2, 3, 4] + })", "$.array[-$.range.from to -$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": 1, + "to": -2 + }, + "array": [1, 2, 3, 4] + })", "$.array[+$.range.from to -$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": -1, + "to": 2 + }, + "array": [1, 2, 3, 4] + })", "$.array[-$.range.from to +$.range.to]", {"2", "3"}}, + {R"({ + "range": { + "from": 1, + "to": 2 + }, + "array": [1, 2, 3, 4] + })", "$.array[+$.range.from to +$.range.to]", {"2", "3"}}, + {R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}}, + {"100000000000000000000000000", "-$", {"-1e+26"}}, }; for (const auto& testCase : testCases) {