From 25311e4bb54c703664e3505f85f424269e2e0487 Mon Sep 17 00:00:00 2001 From: Semyon Date: Mon, 21 Oct 2024 19:52:06 +0300 Subject: [PATCH] Use simdjson for binary json construction for improved performance (#10464) --- .github/config/muted_ya.txt | 1 + .../formats/arrow/accessor/abstract/ya.make | 1 + ydb/core/formats/arrow/converter.cpp | 9 +- ydb/core/formats/arrow/ut/ut_arrow.cpp | 2 +- ydb/core/io_formats/cell_maker/cell_maker.cpp | 13 +- ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp | 2 +- .../engines/scheme/defaults/common/ya.make | 1 + ydb/core/tx/data_events/common/ya.make | 1 + .../tx/schemeshard/ut_restore/ut_restore.cpp | 2 +- ydb/core/ydb_convert/ydb_convert.cpp | 4 +- .../binary_json/ut_benchmark/write.cpp | 50 ++++ ydb/library/binary_json/ut_benchmark/ya.make | 30 +++ ydb/library/binary_json/write.cpp | 240 +++++++++++++++--- ydb/library/binary_json/write.h | 3 +- ydb/library/binary_json/ya.make | 4 + ydb/library/conclusion/generic/result.h | 112 ++++++++ ydb/library/conclusion/generic/status.h | 94 +++++++ ydb/library/conclusion/generic/ya.make | 9 + ydb/library/conclusion/result.h | 110 +------- ydb/library/conclusion/status.cpp | 9 - ydb/library/conclusion/status.h | 142 +---------- ydb/library/conclusion/ya.make | 4 +- .../formats/arrow/accessor/abstract/ya.make | 1 + ydb/library/mkql_proto/mkql_proto.cpp | 2 +- .../invoke_builtins/mkql_builtins_convert.cpp | 2 +- .../yql/minikql/invoke_builtins/ya.make | 3 +- .../yql/minikql/jsonpath/ut/common_ut.cpp | 2 +- ydb/library/yql/minikql/mkql_type_ops.cpp | 2 +- .../yt/codec/codegen/yt_codec_cg.cpp | 2 +- .../metadata/request/request_actor_cb.h | 17 +- 30 files changed, 557 insertions(+), 317 deletions(-) create mode 100644 ydb/library/binary_json/ut_benchmark/write.cpp create mode 100644 ydb/library/binary_json/ut_benchmark/ya.make create mode 100644 ydb/library/conclusion/generic/result.h create mode 100644 ydb/library/conclusion/generic/status.h create mode 100644 ydb/library/conclusion/generic/ya.make diff --git a/.github/config/muted_ya.txt b/.github/config/muted_ya.txt index e40e3fe4c336..81930f3bb0e8 100644 --- a/.github/config/muted_ya.txt +++ b/.github/config/muted_ya.txt @@ -42,6 +42,7 @@ ydb/core/tx/schemeshard/ut_move_reboots TSchemeShardMoveRebootsTest.WithDataAndP ydb/core/tx/schemeshard/ut_pq_reboots TPqGroupTestReboots.AlterWithReboots-PQConfigTransactionsAtSchemeShard-false ydb/core/tx/schemeshard/ut_restore TImportTests.ShouldSucceedOnManyTables ydb/core/tx/schemeshard/ut_split_merge TSchemeShardSplitBySizeTest.Merge1KShards +ydb/core/tx/tiering/ut ColumnShardTiers.TTLUsage ydb/core/tx/tx_proxy/ut_ext_tenant TExtSubDomainTest.CreateTableInsideAndAlterDomainAndTable-AlterDatabaseCreateHiveFirst* ydb/core/tx/tx_proxy/ut_storage_tenant TStorageTenantTest.RemoveStoragePoolBeforeDroppingTablet ydb/core/util/ut TCircularOperationQueueTest.ShouldShuffle diff --git a/ydb/core/formats/arrow/accessor/abstract/ya.make b/ydb/core/formats/arrow/accessor/abstract/ya.make index c40f1f297c18..4e07801b2285 100644 --- a/ydb/core/formats/arrow/accessor/abstract/ya.make +++ b/ydb/core/formats/arrow/accessor/abstract/ya.make @@ -4,6 +4,7 @@ PEERDIR( contrib/libs/apache/arrow ydb/library/conclusion ydb/services/metadata/abstract + ydb/library/actors/core ydb/library/formats/arrow/accessor/abstract ydb/library/formats/arrow/accessor/common ydb/library/formats/arrow/protos diff --git a/ydb/core/formats/arrow/converter.cpp b/ydb/core/formats/arrow/converter.cpp index 08fd2b6d6c6d..e9b2320d5088 100644 --- a/ydb/core/formats/arrow/converter.cpp +++ b/ydb/core/formats/arrow/converter.cpp @@ -31,8 +31,8 @@ static bool ConvertData(TCell& cell, const NScheme::TTypeInfo& colType, TMemoryP } case NScheme::NTypeIds::JsonDocument: { const auto binaryJson = NBinaryJson::SerializeToBinaryJson(cell.AsBuf()); - if (!binaryJson.Defined()) { - errorMessage = "Invalid JSON for JsonDocument provided"; + if (binaryJson.IsFail()) { + errorMessage = "Invalid JSON for JsonDocument provided: " + binaryJson.GetErrorMessage(); return false; } const auto saved = memPool.AppendString(TStringBuf(binaryJson->Data(), binaryJson->Size())); @@ -98,8 +98,9 @@ static arrow::Status ConvertColumn(const NScheme::TTypeInfo colType, std::shared } } else { const auto binaryJson = NBinaryJson::SerializeToBinaryJson(valueBuf); - if (!binaryJson.Defined()) { - return arrow::Status::SerializationError("Cannot serialize json: ", valueBuf); + if (binaryJson.IsFail()) { + return arrow::Status::SerializationError( + "Cannot serialize json (", binaryJson.GetErrorMessage(), "): ", valueBuf.SubStr(0, Min(valueBuf.Size(), 1024ul))); } auto appendResult = builder.Append(binaryJson->Data(), binaryJson->Size()); if (!appendResult.ok()) { diff --git a/ydb/core/formats/arrow/ut/ut_arrow.cpp b/ydb/core/formats/arrow/ut/ut_arrow.cpp index c85cd87c604a..e02d810cb70c 100644 --- a/ydb/core/formats/arrow/ut/ut_arrow.cpp +++ b/ydb/core/formats/arrow/ut/ut_arrow.cpp @@ -223,7 +223,7 @@ struct TDataRow { std::vector cells(value.Cells().data(), value.Cells().data() + value.Cells().size()); auto binaryJson = NBinaryJson::SerializeToBinaryJson(TStringBuf(JsonDocument.data(), JsonDocument.size())); - UNIT_ASSERT(binaryJson.Defined()); + UNIT_ASSERT(binaryJson.IsSuccess()); cells[19] = TCell(binaryJson->Data(), binaryJson->Size()); return TOwnedCellVec(cells); diff --git a/ydb/core/io_formats/cell_maker/cell_maker.cpp b/ydb/core/io_formats/cell_maker/cell_maker.cpp index b5825a6322e5..f50eda625f7e 100644 --- a/ydb/core/io_formats/cell_maker/cell_maker.cpp +++ b/ydb/core/io_formats/cell_maker/cell_maker.cpp @@ -87,8 +87,13 @@ namespace { return false; } - result = NBinaryJson::SerializeToBinaryJson(unescaped); - return result.Defined(); + auto serializedJson = NBinaryJson::SerializeToBinaryJson(unescaped); + if (serializedJson.IsFail()) { + return false; + } + + result = serializedJson.DetachResult(); + return true; } template <> @@ -395,8 +400,8 @@ bool MakeCell(TCell& cell, const NJson::TJsonValue& value, const NScheme::TTypeI case NScheme::NTypeIds::Json: return TCellMaker::MakeDirect(cell, NFormats::WriteJson(value), pool, err); case NScheme::NTypeIds::JsonDocument: - if (const auto& result = NBinaryJson::SerializeToBinaryJson(NFormats::WriteJson(value))) { - return TCellMaker, TStringBuf>::MakeDirect(cell, result, pool, err, &BinaryJsonToStringBuf); + if (auto result = NBinaryJson::SerializeToBinaryJson(NFormats::WriteJson(value)); result.IsSuccess()) { + return TCellMaker, TStringBuf>::MakeDirect(cell, result.DetachResult(), pool, err, &BinaryJsonToStringBuf); } else { return false; } diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index e5529d5a450e..93537b68401b 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -8920,7 +8920,7 @@ Y_UNIT_TEST_SUITE(KqpOlapTypes) { testHelper.CreateTable(testTable); std::string jsonString = R"({"col1": "val1", "obj": {"obj_col2_int": 16}})"; auto maybeJsonDoc = NBinaryJson::SerializeToBinaryJson(jsonString); - Y_ABORT_UNLESS(maybeJsonDoc.Defined()); + Y_ABORT_UNLESS(maybeJsonDoc.IsSuccess()); const std::string jsonBin(maybeJsonDoc->Data(), maybeJsonDoc->Size()); { TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); diff --git a/ydb/core/tx/columnshard/engines/scheme/defaults/common/ya.make b/ydb/core/tx/columnshard/engines/scheme/defaults/common/ya.make index a34b917e4df3..216ae4e37917 100644 --- a/ydb/core/tx/columnshard/engines/scheme/defaults/common/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/defaults/common/ya.make @@ -9,6 +9,7 @@ PEERDIR( contrib/libs/apache/arrow ydb/library/conclusion ydb/core/scheme_types + ydb/library/actors/core ) END() diff --git a/ydb/core/tx/data_events/common/ya.make b/ydb/core/tx/data_events/common/ya.make index 5d60d3500c3d..33e4947a1a3f 100644 --- a/ydb/core/tx/data_events/common/ya.make +++ b/ydb/core/tx/data_events/common/ya.make @@ -2,6 +2,7 @@ LIBRARY() PEERDIR( ydb/core/protos + ydb/library/conclusion ydb/library/yql/core/issue/protos ydb/public/api/protos ) diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp index 235f76209bf0..aee743b5c551 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp @@ -1196,7 +1196,7 @@ value { const TString string = "test string"; const TString json = R"({"key": "value"})"; auto binaryJson = NBinaryJson::SerializeToBinaryJson(json); - Y_ABORT_UNLESS(binaryJson.Defined()); + Y_ABORT_UNLESS(binaryJson.IsSuccess()); const std::pair decimal = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("16.17", NScheme::DECIMAL_PRECISION, NScheme::DECIMAL_SCALE)); const std::pair decimal35 = NYql::NDecimal::MakePair(NYql::NDecimal::FromString("555555555555555.123456789", 35, 10)); diff --git a/ydb/core/ydb_convert/ydb_convert.cpp b/ydb/core/ydb_convert/ydb_convert.cpp index 8ec5aa9c2f32..ff782f552e5d 100644 --- a/ydb/core/ydb_convert/ydb_convert.cpp +++ b/ydb/core/ydb_convert/ydb_convert.cpp @@ -512,7 +512,7 @@ Y_FORCE_INLINE void ConvertData(NUdf::TDataTypeId typeId, const Ydb::Value& valu case NUdf::TDataType::Id: { CheckTypeId(value.value_case(), Ydb::Value::kTextValue, "JsonDocument"); const auto binaryJson = NBinaryJson::SerializeToBinaryJson(value.text_value()); - if (!binaryJson.Defined()) { + if (binaryJson.IsFail()) { throw yexception() << "Invalid JsonDocument value"; } res.SetBytes(binaryJson->Data(), binaryJson->Size()); @@ -1238,7 +1238,7 @@ bool CellFromProtoVal(const NScheme::TTypeInfo& type, i32 typmod, const Ydb::Val } case NScheme::NTypeIds::JsonDocument : { const auto binaryJson = NBinaryJson::SerializeToBinaryJson(val.Gettext_value()); - if (!binaryJson.Defined()) { + if (binaryJson.IsFail()) { err = "Invalid JSON for JsonDocument provided"; return false; } diff --git a/ydb/library/binary_json/ut_benchmark/write.cpp b/ydb/library/binary_json/ut_benchmark/write.cpp new file mode 100644 index 000000000000..8c63fbbfdacc --- /dev/null +++ b/ydb/library/binary_json/ut_benchmark/write.cpp @@ -0,0 +1,50 @@ +#include + +#include +#include +#include +#include + +#include + +// ya test -r -D BENCHMARK_MAKE_LARGE_PART +#ifndef BENCHMARK_MAKE_LARGE_PART +#define BENCHMARK_MAKE_LARGE_PART 0 +#endif + +using namespace NKikimr::NBinaryJson; + +namespace { + +static ui64 seed = 0; + +NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) { + NJson::TJsonValue value; + if (depth == 1) { + value.SetValue(NUnitTest::RandomString(10, seed++)); + return value; + } + for (ui64 i = 0; i < nChildren; ++i) { + value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1)); + } + return value; +} + +TString GetTestJsonString() { + seed = 42; + return NJson::WriteJson(GetTestJson(3, 50)); +} + +static void BenchWriteSimdJson(benchmark::State& state) { + TString value = GetTestJsonString(); + TStringBuf buf(value); + for (auto _ : state) { + auto result = SerializeToBinaryJson(buf); + benchmark::DoNotOptimize(result); + benchmark::ClobberMemory(); + } +} + +} + +BENCHMARK(BenchWriteSimdJson)->MinTime(1); diff --git a/ydb/library/binary_json/ut_benchmark/ya.make b/ydb/library/binary_json/ut_benchmark/ya.make new file mode 100644 index 000000000000..626cc4e426a2 --- /dev/null +++ b/ydb/library/binary_json/ut_benchmark/ya.make @@ -0,0 +1,30 @@ +G_BENCHMARK() + +TAG(ya:fat) +SIZE(LARGE) +TIMEOUT(600) + +IF (BENCHMARK_MAKE_LARGE_PART) + CFLAGS( + -DBENCHMARK_MAKE_LARGE_PART=1 + ) + TIMEOUT(1200) +ENDIF() + +SRCS( + write.cpp +) + +PEERDIR( + library/cpp/testing/unittest + ydb/library/binary_json + ydb/library/yql/minikql/dom + ydb/library/yql/minikql/invoke_builtins/llvm14 + ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/core/issue/protos + ydb/library/yql/sql/pg_dummy +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/binary_json/write.cpp b/ydb/library/binary_json/write.cpp index 88f6338797c0..7bafea612fac 100644 --- a/ydb/library/binary_json/write.cpp +++ b/ydb/library/binary_json/write.cpp @@ -1,12 +1,18 @@ #include "write.h" +#include +#include +#include +#include +#include +#include +#include #include - -#include -#include -#include #include #include +#include +#include +#include #include @@ -71,41 +77,32 @@ struct TContainer { * container index instead. This is exactly how containers are stored in serialized BinaryJson (but with offsets instead of indices) */ struct TJsonIndex { - ui32 InternKey(const TStringBuf value) { + ui32 InternKey(const TStringBuf& value) { TotalKeysCount++; - const auto it = Keys.find(value); - if (it == Keys.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Keys[TString(value)] = currentIndex; + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); + if (emplaced) { + ++LastFreeStringIndex; TotalKeyLength += value.length() + 1; - return currentIndex; - } else { - return it->second; } + return it->second; } - ui32 InternString(const TStringBuf value) { - const auto it = Strings.find(value); - if (it == Strings.end()) { - const ui32 currentIndex = LastFreeStringIndex++; - Strings[value] = currentIndex; + ui32 InternString(const TStringBuf& value) { + const auto [it, emplaced] = Strings.emplace(value, LastFreeStringIndex); + if (emplaced) { + ++LastFreeStringIndex; TotalStringLength += value.length() + 1; - return currentIndex; - } else { - return it->second; } + return it->second; } ui32 InternNumber(double value) { - const auto it = Numbers.find(value); - if (it == Numbers.end()) { - const ui32 currentIndex = LastFreeNumberIndex++; - Numbers[value] = currentIndex; - return currentIndex; - } else { - return it->second; + const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex); + if (emplaced) { + ++LastFreeNumberIndex; } + return it->second; } void AddContainer(EContainerType type) { @@ -133,15 +130,15 @@ struct TJsonIndex { TStack ContainerIndex; TVector Containers; - TMap Keys; + TMap Keys; ui32 TotalKeyLength = 0; ui32 TotalKeysCount = 0; - THashMap Strings; + absl::flat_hash_map Strings; ui32 LastFreeStringIndex = 0; ui32 TotalStringLength = 0; - THashMap Numbers; + absl::flat_hash_map Numbers; ui32 LastFreeNumberIndex = 0; ui32 TotalEntriesCount = 0; @@ -551,20 +548,191 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call } } +template + requires std::is_same_v || std::is_same_v +[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) { +#define RETURN_IF_NOT_SUCCESS(expr) \ + if (const auto& status = expr; Y_UNLIKELY(status != simdjson::SUCCESS)) { \ + return status; \ + } + + switch (value.type()) { + case simdjson::ondemand::json_type::string: { + std::string_view v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnString(v); + break; + } + case simdjson::ondemand::json_type::boolean: { + bool v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnBoolean(v); + break; + } + case simdjson::ondemand::json_type::number: { + switch (value.get_number_type()) { + case simdjson::fallback::number_type::floating_point_number: { + double v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnDouble(v); + break; + } + case simdjson::fallback::number_type::signed_integer: { + i64 v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnInteger(v); + break; + } + case simdjson::fallback::number_type::unsigned_integer: { + ui64 v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnUInteger(v); + break; + } + case simdjson::fallback::number_type::big_integer: + double v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnDouble(v); + break; + } + break; + } + case simdjson::ondemand::json_type::null: { + auto is_null = value.is_null(); + RETURN_IF_NOT_SUCCESS(is_null.error()); + Y_ABORT_UNLESS(is_null.value_unsafe()); + callbacks.OnNull(); + break; + } + case simdjson::ondemand::json_type::array: { + callbacks.OnOpenArray(); + + simdjson::ondemand::array v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + for (auto item : v) { + RETURN_IF_NOT_SUCCESS(item.error()); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value_unsafe(), callbacks)); + } + + callbacks.OnCloseArray(); + break; + } + case simdjson::ondemand::json_type::object: { + callbacks.OnOpenMap(); + + simdjson::ondemand::object v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + for (auto item : v) { + RETURN_IF_NOT_SUCCESS(item.error()); + auto& keyValue = item.value_unsafe(); + const auto key = keyValue.unescaped_key(); + RETURN_IF_NOT_SUCCESS(key.error()); + callbacks.OnMapKey(key.value_unsafe()); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(keyValue.value(), callbacks)); + } + + callbacks.OnCloseMap(); + break; + } + } + + return simdjson::SUCCESS; + +#undef RETURN_IF_NOT_SUCCESS +} + +// unused, left for performance comparison +[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { +#define RETURN_IF_NOT_SUCCESS(status) \ + if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \ + return status; \ + } + + switch (value.type()) { + case simdjson::dom::element_type::STRING: { + std::string_view v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnString(v); + break; + } + case simdjson::dom::element_type::BOOL: { + bool v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnBoolean(v); + break; + } + case simdjson::dom::element_type::INT64: { + i64 v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnInteger(v); + break; + } + case simdjson::dom::element_type::UINT64: { + ui64 v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnUInteger(v); + break; + } + case simdjson::dom::element_type::DOUBLE: { + double v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + callbacks.OnDouble(v); + break; + } + case simdjson::dom::element_type::NULL_VALUE: + callbacks.OnNull(); + break; + case simdjson::dom::element_type::ARRAY: { + callbacks.OnOpenArray(); + + simdjson::dom::array v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + for (const auto& item : v) { + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item, callbacks)); + } + + callbacks.OnCloseArray(); + break; + } + case simdjson::dom::element_type::OBJECT: { + callbacks.OnOpenMap(); + + simdjson::dom::object v; + RETURN_IF_NOT_SUCCESS(value.get(v)); + for (const auto& item : v) { + callbacks.OnMapKey(item.key); + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value, callbacks)); + } + + callbacks.OnCloseMap(); + break; + } + } + return simdjson::SUCCESS; +#undef RETURN_IF_NOT_SUCCESS +} } -TMaybe SerializeToBinaryJsonImpl(const TStringBuf json) { - TMemoryInput input(json.data(), json.size()); +TConclusion SerializeToBinaryJsonImpl(const TStringBuf json) { TBinaryJsonCallbacks callbacks(/* throwException */ false); - if (!ReadJson(&input, &callbacks)) { - return Nothing(); + const simdjson::padded_string paddedJson(json); + simdjson::ondemand::parser parser; + try { + auto doc = parser.iterate(paddedJson); + if (auto status = doc.error(); status != simdjson::SUCCESS) { + return TConclusionStatus::Fail(simdjson::error_message(status)); + } + if (auto status = SimdJsonToJsonIndex(doc.value_unsafe(), callbacks); status != simdjson::SUCCESS) { + return TConclusionStatus::Fail(simdjson::error_message(status)); + } + } catch (const simdjson::simdjson_error& e) { + return TConclusionStatus::Fail(e.what()); } TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); return std::move(serializer).Serialize(); - } -TMaybe SerializeToBinaryJson(const TStringBuf json) { +TConclusion SerializeToBinaryJson(const TStringBuf json) { return SerializeToBinaryJsonImpl(json); } diff --git a/ydb/library/binary_json/write.h b/ydb/library/binary_json/write.h index f1d4dad7cdc1..814f4a549d56 100644 --- a/ydb/library/binary_json/write.h +++ b/ydb/library/binary_json/write.h @@ -2,6 +2,7 @@ #include "format.h" +#include #include #include @@ -11,7 +12,7 @@ namespace NKikimr::NBinaryJson { /** * @brief Translates textual JSON into BinaryJson */ -TMaybe SerializeToBinaryJson(const TStringBuf json); +TConclusion SerializeToBinaryJson(const TStringBuf json); /** * @brief Translates DOM layout from `yql/library/dom` library into BinaryJson diff --git a/ydb/library/binary_json/ya.make b/ydb/library/binary_json/ya.make index 93b3032fd223..5d8b70d56e97 100644 --- a/ydb/library/binary_json/ya.make +++ b/ydb/library/binary_json/ya.make @@ -7,8 +7,11 @@ YQL_ABI_VERSION( ) PEERDIR( + library/cpp/containers/absl_flat_hash library/cpp/json + ydb/library/conclusion ydb/library/yql/minikql/dom + contrib/libs/simdjson ) SRCS( @@ -23,4 +26,5 @@ END() RECURSE_FOR_TESTS( ut + ut_benchmark ) diff --git a/ydb/library/conclusion/generic/result.h b/ydb/library/conclusion/generic/result.h new file mode 100644 index 000000000000..b0d93d3a404d --- /dev/null +++ b/ydb/library/conclusion/generic/result.h @@ -0,0 +1,112 @@ +#pragma once +#include +#include + +#include +#include + +namespace NKikimr { + +template +class TConclusionImpl { +private: + std::variant Result; + +public: + TConclusionImpl(TStatus&& status) + : Result(std::move(status)) { + auto* resStatus = std::get_if(&Result); + Y_ABORT_UNLESS(resStatus->IsFail()); + } + + bool IsFail() const { + return std::holds_alternative(Result); + } + + bool IsSuccess() const { + return std::holds_alternative(Result); + } + + TConclusionImpl(const TStatus& status) + : Result(status) { + Y_ABORT_UNLESS(IsFail()); + } + + template + TConclusionImpl(TResultArg&& result) + : Result(std::move(result)) { + } + + template + TConclusionImpl(const TResultArg& result) + : Result(result) { + } + + template + TConclusionImpl(TResultArg& result) + : Result(result) { + } + + const TStatus& GetError() const { + auto result = std::get_if(&Result); + Y_ABORT_UNLESS(result, "incorrect object for error request"); + return *result; + } + + const TResult& GetResult() const { + auto result = std::get_if(&Result); + Y_ABORT_UNLESS(result, "incorrect object for result request"); + return *result; + } + + TResult& MutableResult() { + auto result = std::get_if(&Result); + Y_ABORT_UNLESS(result, "incorrect object for result request"); + return *result; + } + + TResult&& DetachResult() { + auto result = std::get_if(&Result); + Y_ABORT_UNLESS(result, "incorrect object for result request: %s", GetErrorMessage().data()); + return std::move(*result); + } + + const TResult* operator->() const { + return &GetResult(); + } + + TResult* operator->() { + return &MutableResult(); + } + + const TResult& operator*() const { + return GetResult(); + } + + bool operator!() const { + return IsFail(); + } + + operator TStatus() const { + return GetError(); + } + + const TString& GetErrorMessage() const { + auto* status = std::get_if(&Result); + if (!status) { + return Default(); + } else { + return status->GetErrorMessage(); + } + } + + auto GetStatus() const { + auto* status = std::get_if(&Result); + if (!status) { + return TStatus::Success().GetStatus(); + } else { + return status->GetStatus(); + } + } +}; +} diff --git a/ydb/library/conclusion/generic/status.h b/ydb/library/conclusion/generic/status.h new file mode 100644 index 000000000000..26be88712b50 --- /dev/null +++ b/ydb/library/conclusion/generic/status.h @@ -0,0 +1,94 @@ +#pragma once + +#include +#include + +#include + +namespace NKikimr { + +template +class TConclusionStatusImpl { +private: + std::optional ErrorMessage; + TStatus Status = StatusOk; + TConclusionStatusImpl() = default; + TConclusionStatusImpl(const TString& errorMessage, TStatus status = DefaultError) + : ErrorMessage(errorMessage) + , Status(status) { + Y_ABORT_UNLESS(!!ErrorMessage); + } + + TConclusionStatusImpl(const char* errorMessage, TStatus status = DefaultError) + : ErrorMessage(errorMessage) + , Status(status) { + Y_ABORT_UNLESS(!!ErrorMessage); + } + + TConclusionStatusImpl(const std::string& errorMessage, TStatus status = DefaultError) + : ErrorMessage(TString(errorMessage.data(), errorMessage.size())) + , Status(status) { + Y_ABORT_UNLESS(!!ErrorMessage); + } + +public: + void Validate(const TString& processInfo = Default()) const { + if (processInfo) { + Y_ABORT_UNLESS(Ok(), "error=%s, processInfo=%s", GetErrorMessage().c_str(), processInfo.c_str()); + } else { + Y_ABORT_UNLESS(Ok(), "error=%s", GetErrorMessage().c_str()); + } + } + + [[nodiscard]] const TString& GetErrorMessage() const { + return ErrorMessage ? *ErrorMessage : Default(); + } + + [[nodiscard]] TStatus GetStatus() const { + return Status; + } + + [[nodiscard]] static TConclusionStatusImpl Fail(const char* errorMessage) { + return TConclusionStatusImpl(errorMessage); + } + + [[nodiscard]] static TConclusionStatusImpl Fail(const TString& errorMessage) { + return TConclusionStatusImpl(errorMessage); + } + + [[nodiscard]] static TConclusionStatusImpl Fail(const std::string& errorMessage) { + return TConclusionStatusImpl(errorMessage); + } + + [[nodiscard]] static TConclusionStatusImpl Fail(const TStatus& status, const char* errorMessage) { + Y_ABORT_UNLESS(DefaultError == StatusOk || status != StatusOk); + return TConclusionStatusImpl(errorMessage, status); + } + + [[nodiscard]] static TConclusionStatusImpl Fail(const TStatus& status, const TString& errorMessage) { + Y_ABORT_UNLESS(DefaultError == StatusOk || status != StatusOk); + return TConclusionStatusImpl(errorMessage, status); + } + + [[nodiscard]] bool IsFail() const { + return !Ok(); + } + + [[nodiscard]] bool IsSuccess() const { + return Ok(); + } + + [[nodiscard]] bool Ok() const { + return !ErrorMessage; + } + + [[nodiscard]] bool operator!() const { + return !!ErrorMessage; + } + + [[nodiscard]] static TConclusionStatusImpl Success() { + return TConclusionStatusImpl(); + } +}; + +} // namespace NKikimr diff --git a/ydb/library/conclusion/generic/ya.make b/ydb/library/conclusion/generic/ya.make new file mode 100644 index 000000000000..1e614b2bfb5c --- /dev/null +++ b/ydb/library/conclusion/generic/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +SRCS() + +PEERDIR( + util +) + +END() diff --git a/ydb/library/conclusion/result.h b/ydb/library/conclusion/result.h index 3e0cde0c7da2..2839bb18cd22 100644 --- a/ydb/library/conclusion/result.h +++ b/ydb/library/conclusion/result.h @@ -1,111 +1,11 @@ #pragma once #include "status.h" -#include -#include -namespace NKikimr { - -template -class TConclusion { -private: - std::variant Result; -public: - - TConclusion(TConclusionStatus&& status) - : Result(std::move(status)) { - auto* resStatus = std::get_if(&Result); - Y_ABORT_UNLESS(resStatus->IsFail()); - } - - bool IsFail() const { - return std::get_if(&Result); - } - - bool IsSuccess() const { - return std::get_if(&Result); - } - - TConclusion(const TConclusionStatus& status) - : Result(status) { - Y_ABORT_UNLESS(IsFail()); - } - - template - TConclusion(TResultArg&& result) - : Result(std::move(result)) { - } - - template - TConclusion(const TResultArg& result) - : Result(result) { - } - - template - TConclusion(TResultArg& result) - : Result(result) { - } - - const TConclusionStatus& GetError() const { - auto result = std::get_if(&Result); - Y_ABORT_UNLESS(result, "incorrect object for error request"); - return *result; - } +#include - const TResult& GetResult() const { - auto result = std::get_if(&Result); - Y_ABORT_UNLESS(result, "incorrect object for result request"); - return *result; - } - - TResult& MutableResult() { - auto result = std::get_if(&Result); - Y_ABORT_UNLESS(result, "incorrect object for result request"); - return *result; - } - - TResult&& DetachResult() { - auto result = std::get_if(&Result); - Y_ABORT_UNLESS(result, "incorrect object for result request: %s", GetErrorMessage().data()); - return std::move(*result); - } - - const TResult* operator->() const { - return &GetResult(); - } - - TResult* operator->() { - return &MutableResult(); - } - - const TResult& operator*() const { - return GetResult(); - } - - bool operator!() const { - return IsFail(); - } - - operator TConclusionStatus() const { - return GetError(); - } - - const TString& GetErrorMessage() const { - auto* status = std::get_if(&Result); - if (!status) { - return Default(); - } else { - return status->GetErrorMessage(); - } - } +namespace NKikimr { - Ydb::StatusIds::StatusCode GetStatus() const { - auto* status = std::get_if(&Result); - if (!status) { - return Ydb::StatusIds::SUCCESS; - } else { - return status->GetStatus(); - } - } -}; +template +using TConclusion = TConclusionImpl; -} +} // namespace NKikimr diff --git a/ydb/library/conclusion/status.cpp b/ydb/library/conclusion/status.cpp index bc355d9ff02b..e946a122914b 100644 --- a/ydb/library/conclusion/status.cpp +++ b/ydb/library/conclusion/status.cpp @@ -1,14 +1,5 @@ #include "status.h" -#include namespace NKikimr { -void TConclusionStatus::Validate(const TString& processInfo) const { - if (processInfo) { - AFL_VERIFY(Ok())("problem", GetErrorMessage())("process_info", processInfo); - } else { - AFL_VERIFY(Ok())("problem", GetErrorMessage()); - } -} - } diff --git a/ydb/library/conclusion/status.h b/ydb/library/conclusion/status.h index 8af77479de13..b6a0830cf7a6 100644 --- a/ydb/library/conclusion/status.h +++ b/ydb/library/conclusion/status.h @@ -1,145 +1,11 @@ #pragma once -#include - -#include -#include +#include namespace NKikimr { -class TConclusionStatus { -private: - std::optional ErrorMessage; - Ydb::StatusIds::StatusCode Status = Ydb::StatusIds::SUCCESS; - TConclusionStatus() = default; - TConclusionStatus(const TString& errorMessage, Ydb::StatusIds::StatusCode status = Ydb::StatusIds::INTERNAL_ERROR) - : ErrorMessage(errorMessage) - , Status(status) - { - Y_ABORT_UNLESS(!!ErrorMessage); - } - - TConclusionStatus(const char* errorMessage, Ydb::StatusIds::StatusCode status = Ydb::StatusIds::INTERNAL_ERROR) - : ErrorMessage(errorMessage) - , Status(status) { - Y_ABORT_UNLESS(!!ErrorMessage); - } - - TConclusionStatus(const std::string& errorMessage, Ydb::StatusIds::StatusCode status = Ydb::StatusIds::INTERNAL_ERROR) - : ErrorMessage(TString(errorMessage.data(), errorMessage.size())) - , Status(status) { - Y_ABORT_UNLESS(!!ErrorMessage); - } -public: - void Validate(const TString& processInfo = Default()) const; - - [[nodiscard]] const TString& GetErrorMessage() const { - return ErrorMessage ? *ErrorMessage : Default(); - } - - [[nodiscard]] Ydb::StatusIds::StatusCode GetStatus() const { - return Status; - } - - [[nodiscard]] static TConclusionStatus Fail(const char* errorMessage) { - return TConclusionStatus(errorMessage); - } - - [[nodiscard]] static TConclusionStatus Fail(const TString& errorMessage) { - return TConclusionStatus(errorMessage); - } - - [[nodiscard]] static TConclusionStatus Fail(const std::string& errorMessage) { - return TConclusionStatus(errorMessage); - } - - [[nodiscard]] bool IsFail() const { - return !Ok(); - } - - [[nodiscard]] bool IsSuccess() const { - return Ok(); - } - - [[nodiscard]] bool Ok() const { - return !ErrorMessage; - } - - [[nodiscard]] bool operator!() const { - return !!ErrorMessage; - } - - [[nodiscard]] static TConclusionStatus Success() { - return TConclusionStatus(); - } -}; - -template -class TConclusionSpecialStatus { -private: - std::optional ErrorMessage; - TStatus SpecialStatus = StatusOk; - - TConclusionSpecialStatus() = default; - TConclusionSpecialStatus(const TStatus& status, const std::optional& errorMessage = {}) - : ErrorMessage(errorMessage) - , SpecialStatus(status) - { - Y_ABORT_UNLESS(!!ErrorMessage); - } - - TConclusionSpecialStatus(const TStatus& status,const char* errorMessage) - : ErrorMessage(errorMessage) - , SpecialStatus(status) - { - Y_ABORT_UNLESS(!!ErrorMessage); - } -public: - - const TString& GetErrorMessage() const { - return ErrorMessage ? *ErrorMessage : Default(); - } - - static TConclusionSpecialStatus Fail(const char* errorMessage) { - return Fail(DefaultError, errorMessage); - } - - static TConclusionSpecialStatus Fail(const TString& errorMessage) { - return Fail(DefaultError, errorMessage); - } - - static TConclusionSpecialStatus Fail(const TStatus& status, const char* errorMessage) { - Y_ABORT_UNLESS(status != StatusOk); - return TConclusionSpecialStatus(status, errorMessage); - } - - static TConclusionSpecialStatus Fail(const TStatus& status, const TString& errorMessage) { - Y_ABORT_UNLESS(status != StatusOk); - return TConclusionSpecialStatus(status, errorMessage); - } - - const TStatus& GetStatus() const { - return SpecialStatus; - } - - bool IsFail() const { - return !Ok(); - } - - bool Ok() const { - return SpecialStatus == StatusOk; - } - - bool operator!() const { - return !Ok(); - } - - explicit operator bool() const { - return Ok(); - } +using TConclusionStatus = TConclusionStatusImpl<::TNull, ::TNull{}, ::TNull{}>; - static TConclusionSpecialStatus Success() { - return TConclusionSpecialStatus(); - } -}; +template +using TConclusionSpecialStatus = TConclusionStatusImpl; } diff --git a/ydb/library/conclusion/ya.make b/ydb/library/conclusion/ya.make index e6e350a5a55a..41cf1944ba17 100644 --- a/ydb/library/conclusion/ya.make +++ b/ydb/library/conclusion/ya.make @@ -6,8 +6,8 @@ SRCS( ) PEERDIR( - ydb/public/api/protos - ydb/library/actors/core + util + ydb/library/conclusion/generic ) END() diff --git a/ydb/library/formats/arrow/accessor/abstract/ya.make b/ydb/library/formats/arrow/accessor/abstract/ya.make index 418ad9316912..03f6c83afa07 100644 --- a/ydb/library/formats/arrow/accessor/abstract/ya.make +++ b/ydb/library/formats/arrow/accessor/abstract/ya.make @@ -5,6 +5,7 @@ PEERDIR( ydb/library/formats/arrow/accessor/common contrib/libs/apache/arrow ydb/library/conclusion + ydb/library/actors/core ) SRCS( diff --git a/ydb/library/mkql_proto/mkql_proto.cpp b/ydb/library/mkql_proto/mkql_proto.cpp index 95fc138d963f..d6b103eab6f9 100644 --- a/ydb/library/mkql_proto/mkql_proto.cpp +++ b/ydb/library/mkql_proto/mkql_proto.cpp @@ -1601,7 +1601,7 @@ Y_FORCE_INLINE NUdf::TUnboxedValue KindDataImport(const TType* type, const Ydb:: case NUdf::TDataType::Id: { CheckTypeId(value.value_case(), Ydb::Value::kTextValue, "JsonDocument"); const auto binaryJson = NBinaryJson::SerializeToBinaryJson(value.text_value()); - if (!binaryJson.Defined()) { + if (binaryJson.IsFail()) { throw yexception() << "Invalid JsonDocument value"; } return MakeString(TStringBuf(binaryJson->Data(), binaryJson->Size())); diff --git a/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp b/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp index 909526e21493..6cbf8efe5c7f 100644 --- a/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp +++ b/ydb/library/yql/minikql/invoke_builtins/mkql_builtins_convert.cpp @@ -573,7 +573,7 @@ struct TStringConvert { NUdf::TUnboxedValuePod JsonToJsonDocument(const NUdf::TUnboxedValuePod value) { auto binaryJson = NKikimr::NBinaryJson::SerializeToBinaryJson(value.AsStringRef()); - if (!binaryJson.Defined()) { + if (!binaryJson.IsSuccess()) { // JSON parse error happened, return NULL return NUdf::TUnboxedValuePod(); } diff --git a/ydb/library/yql/minikql/invoke_builtins/ya.make b/ydb/library/yql/minikql/invoke_builtins/ya.make index fa7a96b61a0f..846c6063f750 100644 --- a/ydb/library/yql/minikql/invoke_builtins/ya.make +++ b/ydb/library/yql/minikql/invoke_builtins/ya.make @@ -3,8 +3,7 @@ LIBRARY() SRCS( ) -PEERDIR( -) +PEERDIR() YQL_LAST_ABI_VERSION() diff --git a/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp b/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp index 087759f769b5..a32389a76898 100644 --- a/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp +++ b/ydb/library/yql/minikql/jsonpath/ut/common_ut.cpp @@ -339,7 +339,7 @@ class TJsonPathCommonTest : public TJsonPathTestBase { "array": [1, 2, 3, 4] })", "$.array[+$.range.from to +$.range.to]", {"2", "3"}}, {R"([1, 2, 3])", "-$[*]", {"-1", "-2", "-3"}}, - {"10000000000000000000000000", "-$", {"-9.999999999999999e+24"}}, + {"30000000000000000000000000", "-$", {"-3e+25"}}, }; for (const auto& testCase : testCases) { diff --git a/ydb/library/yql/minikql/mkql_type_ops.cpp b/ydb/library/yql/minikql/mkql_type_ops.cpp index be4867ed7265..76b5714897b5 100644 --- a/ydb/library/yql/minikql/mkql_type_ops.cpp +++ b/ydb/library/yql/minikql/mkql_type_ops.cpp @@ -2518,7 +2518,7 @@ NUdf::TUnboxedValuePod ValueFromString(NUdf::EDataSlot type, NUdf::TStringRef bu case NUdf::EDataSlot::JsonDocument: { auto binaryJson = NKikimr::NBinaryJson::SerializeToBinaryJson(buf); - if (!binaryJson.Defined()) { + if (binaryJson.IsFail()) { // JSON parse error happened, return NULL return NUdf::TUnboxedValuePod(); } diff --git a/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp b/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp index f31974650f91..f24fb217a535 100644 --- a/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp +++ b/ydb/library/yql/providers/yt/codec/codegen/yt_codec_cg.cpp @@ -67,7 +67,7 @@ extern "C" void YtCodecReadJsonDocument(void* vbuf, void* vpod) { buf.ReadMany(json.AsStringRef().Data(), size); const auto binaryJson = NBinaryJson::SerializeToBinaryJson(json.AsStringRef()); - if (!binaryJson.Defined()) { + if (binaryJson.IsFail()) { YQL_ENSURE(false, "Invalid JSON stored for JsonDocument type"); } diff --git a/ydb/services/metadata/request/request_actor_cb.h b/ydb/services/metadata/request/request_actor_cb.h index 07bea21c9992..71792903f7a3 100644 --- a/ydb/services/metadata/request/request_actor_cb.h +++ b/ydb/services/metadata/request/request_actor_cb.h @@ -10,7 +10,9 @@ #include #include #include -#include +#include +#include +#include namespace NKikimr::NMetadata::NRequest { @@ -64,15 +66,18 @@ class IChainController: public IExternalController { std::shared_ptr NextController; const NACLib::TUserToken UserToken; protected: - TConclusion BuildNextRequest(typename TCurrentDialogPolicy::TResponse&& result) const { + using TYdbConclusionStatus = TConclusionSpecialStatus; + using TRequestConclusion = TConclusionImpl; + + TRequestConclusion BuildNextRequest(typename TCurrentDialogPolicy::TResponse&& result) const { return DoBuildNextRequest(std::move(result)); } - virtual TConclusion DoBuildNextRequest(typename TCurrentDialogPolicy::TResponse&& result) const = 0; + virtual TRequestConclusion DoBuildNextRequest(typename TCurrentDialogPolicy::TResponse&& result) const = 0; public: using TDialogPolicy = TCurrentDialogPolicy; virtual void OnRequestResult(typename TCurrentDialogPolicy::TResponse&& result) override { - TConclusion nextRequest = BuildNextRequest(std::move(result)); + TRequestConclusion nextRequest = BuildNextRequest(std::move(result)); if (!nextRequest) { OnRequestFailed(nextRequest.GetStatus(), nextRequest.GetErrorMessage()); } else { @@ -113,14 +118,14 @@ class TSessionedChainController: public IChainController DoBuildNextRequest(TDialogCreateSession::TResponse&& response) const override { + virtual TBase::TRequestConclusion DoBuildNextRequest(TDialogCreateSession::TResponse&& response) const override { auto result = ProtoRequest; Ydb::Table::CreateSessionResponse currentFullReply = std::move(response); Ydb::Table::CreateSessionResult session; currentFullReply.operation().result().UnpackTo(&session); const TString sessionId = session.session_id(); if (!sessionId) { - return TConclusionStatus::Fail("cannot build session for request"); + return TBase::TYdbConclusionStatus::Fail("cannot build session for request"); } result.set_session_id(sessionId); SessionContext->SetSessionId(sessionId);