From 70e152498ba62d8086fe98dfeca35b3b39584ef5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 8 Sep 2024 17:01:45 +0300 Subject: [PATCH 01/69] move --- ydb/core/kqp/common/buffer/events.cpp | 16 + ydb/core/kqp/common/buffer/events.h | 62 + ydb/core/kqp/common/buffer/ya.make | 14 + ydb/core/kqp/common/simple/kqp_event_ids.h | 15 + ydb/core/kqp/executer_actor/kqp_planner.cpp | 28 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 1384 +++++++++++++++--- ydb/core/kqp/runtime/kqp_write_actor.h | 33 + ydb/core/kqp/runtime/kqp_write_table.cpp | 301 ++-- ydb/core/kqp/runtime/kqp_write_table.h | 35 +- ydb/core/kqp/session_actor/kqp_query_state.h | 2 +- ydb/core/protos/kqp.proto | 13 +- 11 files changed, 1563 insertions(+), 340 deletions(-) create mode 100644 ydb/core/kqp/common/buffer/events.cpp create mode 100644 ydb/core/kqp/common/buffer/events.h create mode 100644 ydb/core/kqp/common/buffer/ya.make diff --git a/ydb/core/kqp/common/buffer/events.cpp b/ydb/core/kqp/common/buffer/events.cpp new file mode 100644 index 000000000000..9b10573c4c94 --- /dev/null +++ b/ydb/core/kqp/common/buffer/events.cpp @@ -0,0 +1,16 @@ +#include "events.h" + +namespace NKikimr { +namespace NKqp { + +TEvKqpBuffer::TEvError::TEvError( + const TString& message, + NYql::NDqProto::StatusIds::StatusCode statusCode, + const NYql::TIssues& subIssues) + : Message(message) + , StatusCode(statusCode) + , SubIssues(subIssues) { +} + +} +} diff --git a/ydb/core/kqp/common/buffer/events.h b/ydb/core/kqp/common/buffer/events.h new file mode 100644 index 000000000000..48a845dc6e1e --- /dev/null +++ b/ydb/core/kqp/common/buffer/events.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + + +namespace NKikimr { +namespace NKqp { + +struct TPrepareSettings { + ui64 TxId; + THashSet SendingShards; + THashSet ReceivingShards; + std::optional ArbiterShard; +}; + +struct TPreparedInfo { + ui64 ShardId; + ui64 MinStep; + ui64 MaxStep; + TVector Coordinators; +}; + +struct TEvKqpBuffer { + +struct TEvPrepare : public TEventLocal { + TPrepareSettings Settings; +}; + +struct TEvPrepared : public TEventLocal { + TPreparedInfo Result; +}; + +struct TEvCommit : public TEventLocal { +}; + +struct TEvCommitted : public TEventLocal { + ui64 ShardId; +}; + +struct TEvRollback : public TEventLocal { +}; + +struct TEvFlush : public TEventLocal { +}; + +struct TEvError : public TEventLocal { + TString Message; + NYql::NDqProto::StatusIds::StatusCode StatusCode; + NYql::TIssues SubIssues; + + TEvError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues); +}; + +struct TEvTerminate : public TEventLocal { +}; + +}; + +} +} diff --git a/ydb/core/kqp/common/buffer/ya.make b/ydb/core/kqp/common/buffer/ya.make new file mode 100644 index 000000000000..c4fb712d297f --- /dev/null +++ b/ydb/core/kqp/common/buffer/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + events.cpp +) + +PEERDIR( + ydb/core/kqp/common/simple + ydb/library/yql/public/issue +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/kqp/common/simple/kqp_event_ids.h b/ydb/core/kqp/common/simple/kqp_event_ids.h index f6989d5b69bd..da1d3e811198 100644 --- a/ydb/core/kqp/common/simple/kqp_event_ids.h +++ b/ydb/core/kqp/common/simple/kqp_event_ids.h @@ -46,6 +46,8 @@ struct TKqpEvents { EvListProxyNodesResponse, EvUpdateDatabaseInfo, EvDelayedRequestError + EvBufferWrite, + EvBufferWriteResult, }; static_assert (EvCompileInvalidateRequest + 1 == EvAbortExecution); @@ -181,5 +183,18 @@ struct TKqpWorkloadServiceEvents { }; }; +struct TKqpBufferWriterEvents { + enum EKqpBufferWriterEvents { + EvPrepare = EventSpaceBegin(TKikimrEvents::ES_KQP) + 800, + EvPrepared, + EvCommit, + EvCommitted, + EvRollback, + EvFlush, + EvError, + EvTerminate, + }; +}; + } // namespace NKqp } // namespace NKikimr diff --git a/ydb/core/kqp/executer_actor/kqp_planner.cpp b/ydb/core/kqp/executer_actor/kqp_planner.cpp index 94a6d6992fec..a508b695df0b 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.cpp +++ b/ydb/core/kqp/executer_actor/kqp_planner.cpp @@ -52,6 +52,23 @@ void BuildInitialTaskResources(const TKqpTasksGraph& graph, ui64 taskId, TTaskRe ret.HeavyProgram = opts.GetHasMapJoin(); } +bool NeedToRunLocally(const TTask& task) { + for (const auto& output : task.Outputs) { + if (output.Type == TTaskOutputType::Sink && output.SinkType == KqpTableSinkName) { + YQL_ENSURE(output.SinkSettings); + const google::protobuf::Any& settingsAny = *output.SinkSettings; + YQL_ENSURE(settingsAny.Is()); + NKikimrKqp::TKqpTableSinkSettings settings; + YQL_ENSURE(settingsAny.UnpackTo(&settings)); + if (ActorIdFromProto(settings.GetBufferActorId())) { + // We need to run compute actor locally if it uses buffer actor. + return true; + } + } + } + return false; +} + bool LimitCPU(TIntrusivePtr ctx) { return ctx->PoolId && ctx->PoolConfig.has_value() && ctx->PoolConfig->TotalCpuLimitPercentPerNode > 0; } @@ -420,7 +437,12 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { for(ui64 taskId: group.TaskIds) { auto [it, success] = alreadyAssigned.emplace(taskId, group.NodeId); if (success) { - TasksPerNode[group.NodeId].push_back(taskId); + if (NeedToRunLocally(TasksGraph.GetTask(taskId))) { + const ui64 selfNodeId = ExecuterId.NodeId(); + TasksPerNode[selfNodeId].push_back(taskId); + } else { + TasksPerNode[group.NodeId].push_back(taskId); + } } } } @@ -466,7 +488,7 @@ TString TKqpPlanner::ExecuteDataComputeTask(ui64 taskId, ui32 computeTasksSize) .WithSpilling = WithSpilling, .StatsMode = GetDqStatsMode(StatsMode), .Deadline = Deadline, - .ShareMailbox = (computeTasksSize <= 1), + .ShareMailbox = (computeTasksSize <= 1) || NeedToRunLocally(task), .RlPath = Nothing(), .BlockTrackingMode = BlockTrackingMode }); @@ -524,7 +546,7 @@ std::unique_ptr TKqpPlanner::PlanExecution() { ComputeTasks.clear(); } - if (nComputeTasks == 0 && TasksPerNode.size() == 1 && (AsyncIoFactory != nullptr) && AllowSinglePartitionOpt) { + if ((nComputeTasks == 0 && TasksPerNode.size() == 1 && (AsyncIoFactory != nullptr) && AllowSinglePartitionOpt)) { // query affects a single key or shard, so it might be more effective // to execute this task locally so we can avoid useless overhead for remote task launching. for (auto& [shardId, tasks]: TasksPerNode) { diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index a0888ae65b5e..73d7b6df1205 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -15,12 +15,12 @@ #include #include #include -#include #include #include #include #include #include +#include namespace { @@ -58,43 +58,69 @@ namespace { private: std::optional Lock; }; + + class TLocksManager { + public: + void AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lock) { + Locks[shardId].AddAndCheckLock(lock); + } + + const std::optional& GetLock(ui64 shardId) { + return Locks[shardId].GetLock(); + } + + const THashMap& GetLocks() const { + return Locks; + } + + private: + THashMap Locks; + }; + + NKikimrDataEvents::TEvWrite::TOperation::EOperationType GetOperation(NKikimrKqp::TKqpTableSinkSettings::EType type) { + switch (type) { + case NKikimrKqp::TKqpTableSinkSettings::MODE_REPLACE: + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE; + case NKikimrKqp::TKqpTableSinkSettings::MODE_UPSERT: + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT; + case NKikimrKqp::TKqpTableSinkSettings::MODE_INSERT: + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT; + case NKikimrKqp::TKqpTableSinkSettings::MODE_DELETE: + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_DELETE; + case NKikimrKqp::TKqpTableSinkSettings::MODE_UPDATE: + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE; + default: + return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UNSPECIFIED; + } + } } namespace NKikimr { namespace NKqp { -class TKqpDirectWriteActor : public TActorBootstrapped, public NYql::NDq::IDqComputeActorAsyncOutput { - using TBase = TActorBootstrapped; +struct IKqpTableWriterCallbacks { + virtual ~IKqpTableWriterCallbacks() = default; - class TResumeNotificationManager { - public: - TResumeNotificationManager(TKqpDirectWriteActor& writer) - : Writer(writer) { - CheckMemory(); - } + virtual void OnReady(const TTableId& tableId) = 0; - void CheckMemory() { - const auto freeSpace = Writer.GetFreeSpace(); - const auto targetMemory = Writer.MemoryLimit / 2; - if (freeSpace >= targetMemory && targetMemory > LastFreeMemory) { - YQL_ENSURE(freeSpace > 0); - Writer.ResumeExecution(); - } - LastFreeMemory = freeSpace; - } + virtual void OnPrepared(TPreparedInfo&& preparedInfo) = 0; - private: - TKqpDirectWriteActor& Writer; - i64 LastFreeMemory = std::numeric_limits::max(); - }; + //virtual void OnCommitted(ui64 shardId) = 0; + + virtual void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) = 0; - friend class TResumeNotificationManager; + virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; +}; + +class TKqpTableWriteActor : public TActorBootstrapped { + using TBase = TActorBootstrapped; struct TEvPrivate { enum EEv { EvShardRequestTimeout = EventSpaceBegin(TKikimrEvents::ES_PRIVATE), EvResolveRequestPlanned, + EvTerminate, }; struct TEvShardRequestTimeout : public TEventLocal { @@ -107,112 +133,155 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu struct TEvResolveRequestPlanned : public TEventLocal { }; + + struct TEvTerminate : public TEventLocal { + }; + }; + + enum class EMode { + UNSPECIFIED, + FLUSH, + PREPARE, + COMMIT, + IMMEDIATE_COMMIT, }; public: - TKqpDirectWriteActor( - NKikimrKqp::TKqpTableSinkSettings&& settings, - NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args, - TIntrusivePtr counters) - : LogPrefix(TStringBuilder() << "TxId: " << args.TxId << ", task: " << args.TaskId << ". ") - , Settings(std::move(settings)) - , MessageSettings(GetWriteActorSettings()) - , OutputIndex(args.OutputIndex) - , Callbacks(args.Callback) - , Counters(counters) - , TypeEnv(args.TypeEnv) - , Alloc(args.Alloc) - , TxId(args.TxId) - , TableId( - Settings.GetTable().GetOwnerId(), - Settings.GetTable().GetTableId(), - Settings.GetTable().GetVersion()) - , FinalTx( - Settings.GetFinalTx()) - , ImmediateTx( - Settings.GetImmediateTx()) - , InconsistentTx( - Settings.GetInconsistentTx()) - , MemoryLimit(MessageSettings.InFlightMemoryLimitPerActorBytes) + TKqpTableWriteActor( + IKqpTableWriterCallbacks* callbacks, + const TTableId& tableId, + const TStringBuf tablePath, + const ui64 lockTxId, + const ui64 lockNodeId, + const bool inconsistentTx, + const NMiniKQL::TTypeEnvironment& typeEnv, + std::shared_ptr alloc) + : TypeEnv(typeEnv) + , Alloc(alloc) + , TableId(tableId) + , TablePath(tablePath) + , LockTxId(lockTxId) + , LockNodeId(lockNodeId) + , InconsistentTx(inconsistentTx) + , Callbacks(callbacks) { - YQL_ENSURE(std::holds_alternative(TxId)); - YQL_ENSURE(!ImmediateTx); - EgressStats.Level = args.StatsLevel; + try { + ShardedWriteController = CreateShardedWriteController( + TShardedWriteControllerSettings { + .MemoryLimitTotal = kInFlightMemoryLimitPerActor, + .MemoryLimitPerMessage = kMemoryLimitPerMessage, + .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable + ? 1 + : kMaxBatchesPerMessage), + }, + TypeEnv, + Alloc); + } catch (...) { + RuntimeError( + CurrentExceptionMessage(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } } void Bootstrap() { LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; ResolveTable(); - Become(&TKqpDirectWriteActor::StateFunc); + Become(&TKqpTableWriteActor::StateProcessing); } - static constexpr char ActorName[] = "KQP_WRITE_ACTOR"; + static constexpr char ActorName[] = "KQP_TABLE_WRITE_ACTOR"; -private: - virtual ~TKqpDirectWriteActor() { + i64 GetMemory() const { + return IsReady() + ? ShardedWriteController->GetMemory() + : 0; } - void CommitState(const NYql::NDqProto::TCheckpoint&) final {}; - void LoadState(const NYql::NDq::TSinkState&) final {}; + bool IsReady() const { + return ShardedWriteController->IsReady(); + } - ui64 GetOutputIndex() const final { - return OutputIndex; + const THashMap& GetLocks() const { + return LocksManager.GetLocks(); } - const NYql::NDq::TDqAsyncStats& GetEgressStats() const final { - return EgressStats; + TVector GetShardsIds() const { + return (!ShardedWriteController) + ? TVector() + : ShardedWriteController->GetShardsIds(); } - i64 GetFreeSpace() const final { - const i64 result = (ShardedWriteController && !IsResolving()) - ? MemoryLimit - ShardedWriteController->GetMemory() - : std::numeric_limits::min(); // Can't use zero here because compute can use overcommit! - return result; + std::optional GetShardsCount() const { + return (InconsistentTx || !ShardedWriteController) + ? std::nullopt + : std::optional(ShardedWriteController->GetShardsCount()); } - TMaybe ExtraData() override { - NKikimrKqp::TEvKqpOutputActorResultInfo resultInfo; - for (const auto& [_, lockInfo] : LocksInfo) { - if (const auto& lock = lockInfo.GetLock(); lock) { - resultInfo.AddLocks()->CopyFrom(*lock); - } - } - google::protobuf::Any result; - result.PackFrom(resultInfo); - return result; + // void Commit(bool immediate) {} + + using TWriteToken = IShardedWriteController::TWriteToken; + + TWriteToken Open( + NKikimrDataEvents::TEvWrite::TOperation::EOperationType operationType, + TVector&& columnsMetadata) { + YQL_ENSURE(!Closed); + auto token = ShardedWriteController->Open( + TableId, + operationType, + std::move(columnsMetadata)); + return token; } - void SendData(NMiniKQL::TUnboxedValueBatch&& data, i64 size, const TMaybe&, bool finished) final { + void Write(TWriteToken token, const NMiniKQL::TUnboxedValueBatch& data) { YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); - YQL_ENSURE(!Finished); - Finished = finished; - EgressStats.Resume(); - - CA_LOG_D("New data: size=" << size << ", finished=" << finished << ", used memory=" << ShardedWriteController->GetMemory() << "."); + YQL_ENSURE(!Closed); + YQL_ENSURE(ShardedWriteController); + try { + ShardedWriteController->Write(token, data); + } catch (...) { + RuntimeError( + CurrentExceptionMessage(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } + } + void Close(TWriteToken token) { + YQL_ENSURE(!Closed); YQL_ENSURE(ShardedWriteController); try { - ShardedWriteController->AddData(std::move(data)); - if (Finished) { - ShardedWriteController->Close(); - } + ShardedWriteController->Close(token); } catch (...) { RuntimeError( CurrentExceptionMessage(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - ProcessBatches(); } - STFUNC(StateFunc) { + void Close() { + YQL_ENSURE(!Closed); + YQL_ENSURE(ShardedWriteController); + YQL_ENSURE(ShardedWriteController->IsAllWritesClosed()); + Closed = true; + ShardedWriteController->Close(); + } + + bool IsClosed() const { + return Closed; + } + + bool IsFinished() const { + return IsClosed() && ShardedWriteController->IsAllWritesFinished(); + } + + STFUNC(StateProcessing) { try { switch (ev->GetTypeRewrite()) { hFunc(NKikimr::NEvents::TDataEvents::TEvWriteResult, Handle); hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); hFunc(TEvTxProxySchemeCache::TEvResolveKeySetResult, Handle); hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); - IgnoreFunc(TEvTxUserProxy::TEvAllocateTxIdResult); hFunc(TEvPrivate::TEvShardRequestTimeout, Handle); + hFunc(TEvPrivate::TEvTerminate, Handle); hFunc(TEvPrivate::TEvResolveRequestPlanned, Handle); IgnoreFunc(TEvInterconnect::TEvNodeConnected); IgnoreFunc(TEvTxProxySchemeCache::TEvInvalidateTableResult); @@ -222,6 +291,10 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } } + STFUNC(StateTerminating) { + Y_UNUSED(ev); + } + bool IsResolving() const { return ResolveAttempts > 0; } @@ -252,7 +325,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu << "Too many table resolve attempts for table " << TableId << "."); RuntimeError( TStringBuilder() - << "Too many table resolve attempts for table `" << Settings.GetTable().GetPath() << "`.", + << "Too many table resolve attempts for table `" << TablePath << "`.", NYql::NDqProto::StatusIds::SCHEME_ERROR); return; } @@ -294,7 +367,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } if (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { - YQL_ENSURE(!ImmediateTx); Prepare(); } else { ResolveShards(); @@ -384,7 +456,15 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu return; } case NKikimrDataEvents::TEvWriteResult::STATUS_PREPARED: { - YQL_ENSURE(false); + const auto& result = ev->Get()->Record; + TPreparedInfo preparedInfo; + preparedInfo.ShardId = result.GetOrigin(); + preparedInfo.MinStep = result.GetMinStep(); + preparedInfo.MaxStep = result.GetMaxStep(); + preparedInfo.Coordinators = TVector(result.GetDomainCoordinators().begin(), + result.GetDomainCoordinators().end()); + Callbacks->OnPrepared(std::move(preparedInfo)); + return; } case NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED: { ProcessWriteCompletedShard(ev); @@ -534,8 +614,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu return builder; }()); - OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), ev->Cookie); - for (const auto& lock : ev->Get()->Record.GetTxLocks()) { if (!LocksInfo[ev->Get()->Record.GetOrigin()].AddAndCheckLock(lock)) { RuntimeError( @@ -543,93 +621,117 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu << SchemeEntry->TableId.PathId.ToString() << "`.", NYql::NDqProto::StatusIds::ABORTED, NYql::TIssues{}); - } + LocksManager.AddLock(ev->Get()->Record.GetOrigin(), lock); } - ProcessBatches(); + if (Mode == EMode::COMMIT) { + Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), 0, true); + } else { + const auto result = ShardedWriteController->OnMessageAcknowledged( + ev->Get()->Record.GetOrigin(), ev->Cookie); + if (result) { + Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), result->DataSize, result->IsShardEmpty); + } + } } - void OnMessageAcknowledged(ui64 shardId, ui64 cookie) { - TResumeNotificationManager resumeNotificator(*this); - const auto removedDataSize = ShardedWriteController->OnMessageAcknowledged(shardId, cookie); - if (removedDataSize) { - EgressStats.Bytes += *removedDataSize; - EgressStats.Chunks++; - EgressStats.Splits++; - EgressStats.Resume(); + void SetPrepare(ui64 txId) { + Mode = EMode::PREPARE; + TxId = txId; + for (const auto shardId : ShardedWriteController->GetShardsIds()) { + const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); + if (!metadata || (metadata->IsLast && metadata->SendAttempts != 0)) { + SendEmptyFinalToShard(shardId); + } } - resumeNotificator.CheckMemory(); } - void ProcessBatches() { - if (!ImmediateTx || Finished || GetFreeSpace() <= 0) { - SendBatchesToShards(); - } + void SetCommit() { + Mode = EMode::COMMIT; + } - if (Finished && ShardedWriteController->IsFinished()) { - CA_LOG_D("Write actor finished"); - Callbacks->OnAsyncOutputFinished(GetOutputIndex()); - } + void SetImmediateCommit(ui64 txId) { + Mode = EMode::IMMEDIATE_COMMIT; + TxId = txId; + // TODO: send data for empty } - void SendBatchesToShards() { + void Flush() { + //Mode = EMode::FLUSH; for (const size_t shardId : ShardedWriteController->GetPendingShards()) { SendDataToShard(shardId); } } + void SendEmptyFinalToShard(const ui64 shardId) { + auto evWrite = std::make_unique( + NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetTxId(TxId); + evWrite->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + Send( + PipeCacheId, + new TEvPipeCache::TEvForward(evWrite.release(), shardId, true), + 0, + 0); + } + void SendDataToShard(const ui64 shardId) { const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); YQL_ENSURE(metadata); if (metadata->SendAttempts >= MessageSettings.MaxWriteAttempts) { CA_LOG_E("ShardId=" << shardId - << " for table '" << Settings.GetTable().GetPath() + << " for table '" << TablePath << "': retry limit exceeded." << " Sink=" << this->SelfId() << "."); RuntimeError( TStringBuilder() << "ShardId=" << shardId - << " for table '" << Settings.GetTable().GetPath() + << " for table '" << TablePath << "': retry limit exceeded.", NYql::NDqProto::StatusIds::UNAVAILABLE); return; } - auto evWrite = std::make_unique( - NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + const bool isPrepare = metadata->IsFinal && Mode == EMode::PREPARE; + const bool isImmediateCommit = metadata->IsFinal && Mode == EMode::IMMEDIATE_COMMIT; + + auto evWrite = std::make_unique(); + + evWrite->Record.SetTxMode(isPrepare + ? NKikimrDataEvents::TEvWrite::MODE_PREPARE + : NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - if (ImmediateTx && FinalTx && Finished && metadata->IsFinal) { - // Last immediate write (only for datashard) - if (LocksInfo[shardId].GetLock()) { + if (Closed && isImmediateCommit) { + evWrite->Record.SetTxId(TxId); + const auto lock = LocksManager.GetLock(shardId); // multi immediate evwrite - auto* locks = evWrite->Record.MutableLocks(); - locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - locks->AddSendingShards(shardId); - locks->AddReceivingShards(shardId); - *locks->AddLocks() = *LocksInfo.at(shardId).GetLock(); + auto* locks = evWrite->Record.MutableLocks(); + locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + //locks->AddSendingShards(shardId); // TODO: other shards + //locks->AddReceivingShards(shardId); + if (lock) { + *locks->AddLocks() = *lock; + } + } else if (Closed && isPrepare) { + evWrite->Record.SetTxId(TxId); + // NOT TRUE:: // Last immediate write (only for datashard) + const auto lock = LocksManager.GetLock(shardId); + // multi immediate evwrite + auto* locks = evWrite->Record.MutableLocks(); + locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + //locks->AddSendingShards(shardId); // TODO: other shards + locks->AddReceivingShards(shardId); + if (lock) { + *locks->AddLocks() = *lock; } } else if (!InconsistentTx) { - evWrite->SetLockId(Settings.GetLockTxId(), Settings.GetLockNodeId()); + evWrite->SetLockId(LockTxId, LockNodeId); } const auto serializationResult = ShardedWriteController->SerializeMessageToPayload(shardId, *evWrite); YQL_ENSURE(serializationResult.TotalDataSize > 0); - for (size_t payloadIndex : serializationResult.PayloadIndexes) { - evWrite->AddOperation( - GetOperation(), - { - Settings.GetTable().GetOwnerId(), - Settings.GetTable().GetTableId(), - Settings.GetTable().GetVersion(), - }, - ShardedWriteController->GetWriteColumnIds(), - payloadIndex, - ShardedWriteController->GetDataFormat()); - } - - CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", TxId=" << evWrite->Record.GetTxId() - << ", TxMode=" << evWrite->Record.GetTxMode() + CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", isPrepare=" << isPrepare << ", isImmediateCommit=" << isImmediateCommit << ", TxId=" << evWrite->Record.GetTxId() << ", LockTxId=" << evWrite->Record.GetLockTxId() << ", LockNodeId=" << evWrite->Record.GetLockNodeId() << ", Locks= " << [&]() { TStringBuilder builder; @@ -661,25 +763,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } } - NKikimrDataEvents::TEvWrite::TOperation::EOperationType GetOperation() { - switch (Settings.GetType()) { - case NKikimrKqp::TKqpTableSinkSettings::MODE_REPLACE: - return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE; - case NKikimrKqp::TKqpTableSinkSettings::MODE_UPSERT: - return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT; - case NKikimrKqp::TKqpTableSinkSettings::MODE_INSERT: - return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT; - case NKikimrKqp::TKqpTableSinkSettings::MODE_DELETE: - return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_DELETE; - case NKikimrKqp::TKqpTableSinkSettings::MODE_UPDATE: - return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE; - default: - RuntimeError( - TStringBuilder() << "Unknown operation.", - NYql::NDqProto::StatusIds::INTERNAL_ERROR); - } - } - void RetryShard(const ui64 shardId, const std::optional ifCookieEqual) { const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); if (!metadata || (ifCookieEqual && metadata->Cookie != ifCookieEqual)) { @@ -701,6 +784,11 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu RetryShard(ev->Get()->ShardId, ev->Cookie); } + void Handle(TEvPrivate::TEvTerminate::TPtr&) { + Become(&TKqpTableWriteActor::StateTerminating); + PassAway(); + } + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { CA_LOG_W("TEvDeliveryProblem was received from tablet: " << ev->Get()->TabletId); if (InconsistentTx) { @@ -714,109 +802,947 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } } - void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { - NYql::TIssue issue(message); - for (const auto& i : subIssues) { - issue.AddSubIssue(MakeIntrusive(i)); - } - - NYql::TIssues issues; - issues.AddIssue(std::move(issue)); - - Callbacks->OnAsyncOutputError(OutputIndex, std::move(issues), statusCode); - } - - void PassAway() override { - Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); - TActorBootstrapped::PassAway(); - } - void Prepare() { YQL_ENSURE(SchemeEntry); ResolveAttempts = 0; - if (!ShardedWriteController) { - TVector columnsMetadata; - columnsMetadata.reserve(Settings.GetColumns().size()); - for (const auto & column : Settings.GetColumns()) { - columnsMetadata.push_back(column); - } - - try { - ShardedWriteController = CreateShardedWriteController( - TShardedWriteControllerSettings { - .MemoryLimitTotal = MessageSettings.InFlightMemoryLimitPerActorBytes, - .MemoryLimitPerMessage = MessageSettings.MemoryLimitPerMessageBytes, - .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable - ? 1 - : MessageSettings.MaxBatchesPerMessage), - }, - std::move(columnsMetadata), - TypeEnv, - Alloc); - } catch (...) { - RuntimeError( - CurrentExceptionMessage(), - NYql::NDqProto::StatusIds::INTERNAL_ERROR); - } - } - try { if (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { - ShardedWriteController->OnPartitioningChanged(*SchemeEntry); + ShardedWriteController->OnPartitioningChanged(std::move(*SchemeEntry)); } else { - ShardedWriteController->OnPartitioningChanged(*SchemeEntry, std::move(*SchemeRequest)); + ShardedWriteController->OnPartitioningChanged(std::move(*SchemeEntry), std::move(*SchemeRequest)); } - ResumeExecution(); + SchemeEntry.reset(); + SchemeRequest.reset(); } catch (...) { RuntimeError( CurrentExceptionMessage(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - ProcessBatches(); + + Callbacks->OnReady(TableId); } - void ResumeExecution() { - CA_LOG_D("Resuming execution."); - Callbacks->ResumeExecution(); + void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { + Callbacks->OnError(message, statusCode, subIssues); + } + + void PassAway() override {; + Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); + TActorBootstrapped::PassAway(); + } + + void Terminate() { + Send(this->SelfId(), new TEvPrivate::TEvTerminate{}); } NActors::TActorId PipeCacheId = NKikimr::MakePipePerNodeCacheID(false); TString LogPrefix; - const NKikimrKqp::TKqpTableSinkSettings Settings; - TWriteActorSettings MessageSettings; - const ui64 OutputIndex; - NYql::NDq::TDqAsyncStats EgressStats; - NYql::NDq::IDqComputeActorAsyncOutput::ICallbacks * Callbacks = nullptr; - TIntrusivePtr Counters; const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Alloc; - const NYql::NDq::TTxId TxId; + ui64 TxId = 0; const TTableId TableId; - const bool FinalTx; - const bool ImmediateTx; + const TString TablePath; + + const ui64 LockTxId; + const ui64 LockNodeId; const bool InconsistentTx; + IKqpTableWriterCallbacks* Callbacks; + std::optional SchemeEntry; std::optional SchemeRequest; ui64 ResolveAttempts = 0; - THashMap LocksInfo; - bool Finished = false; - - const i64 MemoryLimit; + TLocksManager LocksManager; + bool Closed = false; + EMode Mode = EMode::UNSPECIFIED; IShardedWriteControllerPtr ShardedWriteController = nullptr; }; -void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory& factory, TIntrusivePtr counters) { - factory.RegisterSink( - TString(NYql::KqpTableSinkName), - [counters] (NKikimrKqp::TKqpTableSinkSettings&& settings, NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args) { - auto* actor = new TKqpDirectWriteActor(std::move(settings), std::move(args), counters); - return std::make_pair(actor, actor); +class TKqpDirectWriteActor : public TActorBootstrapped, public NYql::NDq::IDqComputeActorAsyncOutput, public IKqpTableWriterCallbacks { + using TBase = TActorBootstrapped; + +public: + TKqpDirectWriteActor( + NKikimrKqp::TKqpTableSinkSettings&& settings, + NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args, + TIntrusivePtr counters) + : LogPrefix(TStringBuilder() << "TxId: " << args.TxId << ", task: " << args.TaskId << ". ") + , Settings(std::move(settings)) + , OutputIndex(args.OutputIndex) + , Callbacks(args.Callback) + , Counters(counters) + , TypeEnv(args.TypeEnv) + , Alloc(args.Alloc) + , TxId(std::get(args.TxId)) + , TableId( + Settings.GetTable().GetOwnerId(), + Settings.GetTable().GetTableId(), + Settings.GetTable().GetVersion()) + { + EgressStats.Level = args.StatsLevel; + } + + void Bootstrap() { + LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; + + WriteTableActor = new TKqpTableWriteActor( + this, + TableId, + Settings.GetTable().GetPath(), + Settings.GetLockTxId(), + Settings.GetLockNodeId(), + Settings.GetInconsistentTx(), + TypeEnv, + Alloc); + + WriteTableActorId = RegisterWithSameMailbox(WriteTableActor); + + TVector columnsMetadata; + columnsMetadata.reserve(Settings.GetColumns().size()); + for (const auto & column : Settings.GetColumns()) { + columnsMetadata.push_back(column); + } + WriteToken = WriteTableActor->Open(GetOperation(Settings.GetType()), std::move(columnsMetadata)); + WaitingForTableActor = true; + } + + static constexpr char ActorName[] = "KQP_DIRECT_WRITE_ACTOR"; + +private: + virtual ~TKqpDirectWriteActor() { + } + + void CommitState(const NYql::NDqProto::TCheckpoint&) final {}; + void LoadState(const NYql::NDq::TSinkState&) final {}; + + ui64 GetOutputIndex() const final { + return OutputIndex; + } + + const NYql::NDq::TDqAsyncStats& GetEgressStats() const final { + return EgressStats; + } + + i64 GetFreeSpace() const final { + return (WriteTableActor && WriteTableActor->IsReady()) + ? MemoryLimit - GetMemory() + : std::numeric_limits::min(); // Can't use zero here because compute can use overcommit! + } + + i64 GetMemory() const { + return (WriteTableActor && WriteTableActor->IsReady()) + ? WriteTableActor->GetMemory() + : 0; + } + + TMaybe ExtraData() override { + NKikimrKqp::TEvKqpOutputActorResultInfo resultInfo; + for (const auto& [_, lockInfo] : WriteTableActor->GetLocks()) { + if (const auto lock = lockInfo.GetLock(); lock) { + resultInfo.AddLocks()->CopyFrom(*lock); + } + } + google::protobuf::Any result; + result.PackFrom(resultInfo); + return result; + } + + void SendData(NMiniKQL::TUnboxedValueBatch&& data, i64 size, const TMaybe&, bool finished) final { + YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); + YQL_ENSURE(!Closed); + Closed = finished; + EgressStats.Resume(); + Y_UNUSED(size); + + WriteTableActor->Write(*WriteToken, data); + if (Closed) { + WriteTableActor->Close(*WriteToken); + WriteTableActor->Close(); + } + Process(); + } + + void Process() { + if (GetFreeSpace() <= 0) { + WaitingForTableActor = true; + } else if (WaitingForTableActor && GetFreeSpace() > MemoryLimit / 2) { + ResumeExecution(); + } + + if (Closed || GetFreeSpace() <= 0) { + WriteTableActor->Flush(); + } + + if (Closed && WriteTableActor->IsFinished()) { + CA_LOG_D("Write actor finished"); + Callbacks->OnAsyncOutputFinished(GetOutputIndex()); + } + } + + void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { + NYql::TIssue issue(message); + for (const auto& i : subIssues) { + issue.AddSubIssue(MakeIntrusive(i)); + } + + NYql::TIssues issues; + issues.AddIssue(std::move(issue)); + + Callbacks->OnAsyncOutputError(OutputIndex, std::move(issues), statusCode); + } + + void PassAway() override { + WriteTableActor->Terminate(); + TActorBootstrapped::PassAway(); + } + + void Prepare() { + YQL_ENSURE(SchemeEntry); + ResolveAttempts = 0; + + if (!ShardedWriteController) { + TVector columnsMetadata; + columnsMetadata.reserve(Settings.GetColumns().size()); + for (const auto & column : Settings.GetColumns()) { + columnsMetadata.push_back(column); + } + + try { + ShardedWriteController = CreateShardedWriteController( + TShardedWriteControllerSettings { + .MemoryLimitTotal = MessageSettings.InFlightMemoryLimitPerActorBytes, + .MemoryLimitPerMessage = MessageSettings.MemoryLimitPerMessageBytes, + .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable + ? 1 + : MessageSettings.MaxBatchesPerMessage), + }, + std::move(columnsMetadata), + TypeEnv, + Alloc); + } catch (...) { + RuntimeError( + CurrentExceptionMessage(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } + } + + try { + if (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { + ShardedWriteController->OnPartitioningChanged(*SchemeEntry); + } else { + ShardedWriteController->OnPartitioningChanged(*SchemeEntry, std::move(*SchemeRequest)); + } + ResumeExecution(); + } catch (...) { + RuntimeError( + CurrentExceptionMessage(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } + ProcessBatches(); + } + + void ResumeExecution() { + CA_LOG_D("Resuming execution."); + WaitingForTableActor = false; + Callbacks->ResumeExecution(); + } + + void OnReady(const TTableId&) override { + Process(); + } + + void OnPrepared(TPreparedInfo&&) override { + } + + void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { + Y_UNUSED(shardId, isShardEmpty); + EgressStats.Bytes += dataSize; + EgressStats.Chunks++; + EgressStats.Splits++; + EgressStats.Resume(); + Process(); + } + + void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) override { + RuntimeError(message, statusCode, subIssues); + } + + TString LogPrefix; + const NKikimrKqp::TKqpTableSinkSettings Settings; + TWriteActorSettings MessageSettings; + const ui64 OutputIndex; + NYql::NDq::TDqAsyncStats EgressStats; + NYql::NDq::IDqComputeActorAsyncOutput::ICallbacks * Callbacks = nullptr; + TIntrusivePtr Counters; + const NMiniKQL::TTypeEnvironment& TypeEnv; + std::shared_ptr Alloc; + + const ui64 TxId; + const TTableId TableId; + TKqpTableWriteActor* WriteTableActor = nullptr; + TActorId WriteTableActorId; + + std::optional WriteToken; + + bool Closed = false; + + bool WaitingForTableActor = false; + const i64 MemoryLimit = kInFlightMemoryLimitPerActor; +}; + + +namespace { + +struct TWriteToken { + TTableId TableId; + ui64 Cookie; + + bool IsEmpty() const { + return !TableId; + } +}; + +struct TTransactionSettings { + ui64 TxId = 0; + ui64 LockTxId = 0; + ui64 LockNodeId = 0; + bool InconsistentTx = false; +}; + +struct TWriteSettings { + TTableId TableId; + TString TablePath; // for error messages + NKikimrDataEvents::TEvWrite::TOperation::EOperationType OperationType; + TVector Columns; + TTransactionSettings TransactionSettings; +}; + +struct TBufferWriteMessage { + TActorId From; + TWriteToken Token; + bool Close = false; + std::shared_ptr> Data; + std::shared_ptr Alloc; +}; + +struct TEvBufferWrite : public TEventLocal { + bool Close = false; + std::optional Token; + std::optional Settings; + std::shared_ptr> Data; + std::shared_ptr Alloc; +}; + +struct TEvBufferWriteResult : public TEventLocal { + TWriteToken Token; +}; + +} + + +class TKqpBufferWriteActor :public TActorBootstrapped, public IKqpWriteBuffer, public IKqpTableWriterCallbacks { + using TBase = TActorBootstrapped; + +public: + enum class EState { + WAITING, // Out of memory, wait for free memory. Can't accept any writes in this state. + WRITING, // Allow to write data to buffer (there is free memory). + FLUSHING, // Force flush (for uncommitted changes visibility). Can't accept any writes in this state. + PREPARING, // Do preparation for commit. All writers are closed. New writes wouldn't be accepted. + COMMITTING, // Do immediate commit (single shard). All writers are closed. New writes wouldn't be accepted. + ROLLINGBACK, // Do rollback. New writes wouldn't be accepted. + FINISHED, + }; + +public: + TKqpBufferWriteActor( + TKqpBufferWriterSettings&& settings) + : SessionActorId(settings.SessionActorId) + , Alloc(std::make_shared(__LOCATION__)) + , TypeEnv(*Alloc) + { + Alloc->Release(); + State = EState::WRITING; + } + + void Bootstrap() { + LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; + Become(&TKqpBufferWriteActor::StateFuncBuf); + } + + static constexpr char ActorName[] = "KQP_BUFFER_WRITE_ACTOR"; + + STFUNC(StateFuncBuf) { + try { + switch (ev->GetTypeRewrite()) { + hFunc(TEvKqpBuffer::TEvTerminate, Handle); + hFunc(TEvBufferWrite, Handle); + default: + AFL_ENSURE(false)("unknown message", ev->GetTypeRewrite()); + } + } catch (const yexception& e) { + ReplyErrorAndDie(e.what(), NYql::NDqProto::StatusIds::INTERNAL_ERROR, {}); + } + } + + void Handle(TEvBufferWrite::TPtr& ev) { + TWriteToken token; + if (!ev->Get()->Token) { + AFL_ENSURE(ev->Get()->Settings); + token = Open(std::move(*ev->Get()->Settings)); + } else { + token = *ev->Get()->Token; + } + + auto& queue = DataQueues[token.TableId]; + queue.emplace_back(); + auto& message = queue.back(); + + message.Token = token; + message.From = ev->Sender; + message.Close = ev->Get()->Close; + message.Data = ev->Get()->Data; + message.Alloc = ev->Get()->Alloc; + + if (HasWrites) { + AFL_ENSURE(LockTxId == ev->Get()->Settings->TransactionSettings.LockTxId); + AFL_ENSURE(LockNodeId == ev->Get()->Settings->TransactionSettings.LockNodeId); + AFL_ENSURE(InconsistentTx == ev->Get()->Settings->TransactionSettings.InconsistentTx); + } else { + LockTxId = ev->Get()->Settings->TransactionSettings.LockTxId; + LockNodeId = ev->Get()->Settings->TransactionSettings.LockNodeId; + InconsistentTx = ev->Get()->Settings->TransactionSettings.InconsistentTx; + HasWrites = true; + } + + ProcessQueue(token.TableId); + } + + void ProcessQueue(const TTableId& tableId) { + auto& queue = DataQueues.at(tableId); + auto& writeInfo = WriteInfos.at(tableId); + + if (!writeInfo.WriteTableActor->IsReady()) { + return; + } + + while (!queue.empty()) { + auto& message = queue.front(); + + if (!message.Data->empty()) { + for (const auto& data : *message.Data) { + Write(message.Token, data); + } + } + if (message.Close) { + Close(message.Token); + } + + auto result = std::make_unique(); + result->Token = message.Token; + + Send(message.From, result.release()); + + { + TGuard guard(*message.Alloc); + message.Data = nullptr; + } + queue.pop_front(); + } + + Process(); + } + + TWriteToken Open(TWriteSettings&& settings) { + YQL_ENSURE(State == EState::WRITING || State == EState::WAITING); + + auto& info = WriteInfos[settings.TableId]; + if (!info.WriteTableActor) { + info.WriteTableActor = new TKqpTableWriteActor( + this, + settings.TableId, + settings.TablePath, + LockTxId, + LockNodeId, + InconsistentTx, + TypeEnv, + Alloc); + info.WriteTableActorId = RegisterWithSameMailbox(info.WriteTableActor); + State = EState::WAITING; + } + + auto writeToken = info.WriteTableActor->Open(settings.OperationType, std::move(settings.Columns)); + return {settings.TableId, std::move(writeToken)}; + } + + void Write(TWriteToken token, const NMiniKQL::TUnboxedValueBatch& data) { + YQL_ENSURE(State == EState::WRITING || State == EState::WAITING); + + auto& info = WriteInfos.at(token.TableId); + info.WriteTableActor->Write(token.Cookie, data); + } + + void Close(TWriteToken token) { + YQL_ENSURE(State == EState::WRITING || State == EState::WAITING); + + auto& info = WriteInfos.at(token.TableId); + info.WriteTableActor->Close(token.Cookie); + } + + THashMap GetLocks(TWriteToken token) const { + auto& info = WriteInfos.at(token.TableId); + THashMap result; + for (const auto& [shardId, lockInfo] : info.WriteTableActor->GetLocks()) { + if (const auto lock = lockInfo.GetLock(); lock) { + result.emplace(shardId, *lock); + } + } + return result; + } + + THashMap GetLocks() const override { + THashMap result; + for (const auto& [_, info] : WriteInfos) { + for (const auto& [shardId, lockInfo] : info.WriteTableActor->GetLocks()) { + if (const auto lock = lockInfo.GetLock(); lock) { + result.emplace(shardId, *lock); + } + } + } + return result; + } + + void Flush(std::function callback) override { + State = EState::FLUSHING; + OnFlushedCallback = callback; + Close(); + Process(); + } + + void Prepare(std::function callback, TPrepareSettings&& prepareSettings) override { + YQL_ENSURE(State == EState::WRITING); + Y_UNUSED(callback, prepareSettings); + State = EState::PREPARING; + OnPreparedCallback = std::move(callback); + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->SetPrepare(prepareSettings.TxId); + } + Close(); + Process(); + } + + void OnCommit(std::function callback) override { + YQL_ENSURE(State == EState::PREPARING); + State = EState::COMMITTING; + OnCommitCallback = std::move(callback); + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->SetCommit(); + } + } + + void ImmediateCommit(std::function callback, ui64 txId) override { + YQL_ENSURE(State == EState::WRITING); + State = EState::COMMITTING; + OnCommitCallback = std::move(callback); + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->SetImmediateCommit(txId); + } + Close(); + Process(); + } + + void Close() { + for (auto& [_, info] : WriteInfos) { + if (!info.WriteTableActor->IsClosed()) { + info.WriteTableActor->Close(); + } + } + } + + bool IsFinished() const override { + return State == EState::FINISHED; + } + + i64 GetFreeSpace(TWriteToken token) const { + auto& info = WriteInfos.at(token.TableId); + return info.WriteTableActor->IsReady() + ? MemoryLimit - info.WriteTableActor->GetMemory() + : std::numeric_limits::min(); // Can't use zero here because compute can use overcommit! + } + + i64 GetTotalFreeSpace() const { + return MemoryLimit - GetTotalMemory(); + } + + i64 GetTotalMemory() const { + i64 totalMemory = 0; + for (auto& [_, info] : WriteInfos) { + totalMemory += info.WriteTableActor->IsReady() + ? info.WriteTableActor->GetMemory() + : 0; + } + return totalMemory; + } + + THashSet GetShardsIds() const override { + THashSet shardIds; + for (auto& [_, info] : WriteInfos) { + for (const auto& id : info.WriteTableActor->GetShardsIds()) { + shardIds.insert(id); + } + } + return shardIds; + } + + void PassAway() override { + for (auto& [_, queue] : DataQueues) { + while (!queue.empty()) { + auto& message = queue.front(); + { + TGuard guard(*message.Alloc); + message.Data = nullptr; + } + queue.pop_front(); + } + } + + for (auto& [_, info] : WriteInfos) { + if (info.WriteTableActor) { + info.WriteTableActor->Terminate(); + } + } + TActorBootstrapped::PassAway(); + } + + void Handle(TEvKqpBuffer::TEvTerminate::TPtr&) { + PassAway(); + } + + TActorId GetActorId() const override { + return SelfId(); + } + + void Process() { + if (GetTotalFreeSpace() <= 0) { + State = EState::WAITING; + } else if (State == EState::WAITING && GetTotalFreeSpace() > MemoryLimit / 2) { + ResumeExecution(); + } + + const bool needToFlush = (State == EState::WAITING + || State == EState::FLUSHING + || State == EState::PREPARING + || State == EState::COMMITTING + || State == EState::ROLLINGBACK); + + if (needToFlush) { + for (auto& [_, info] : WriteInfos) { + if (info.WriteTableActor->IsReady()) { + info.WriteTableActor->Flush(); + } + } + } + + bool isFinished = true; + for (auto& [_, info] : WriteInfos) { + isFinished &= info.WriteTableActor->IsFinished(); + } + if (isFinished) { + CA_LOG_D("Write actor finished"); + switch (State) { + case EState::PREPARING: + //Settings.Callbacks->OnPrepared(); + break; + case EState::COMMITTING: + //Settings.Callbacks->OnCommitted(); + break; + case EState::ROLLINGBACK: + //Settings.Callbacks->OnRolledBack(); + break; + case EState::FLUSHING: + //Settings.Callbacks->OnFlushed(); + //if (OnFlushedCallback != nullptr) { + YQL_ENSURE(OnFlushedCallback != nullptr); + OnFlushedCallback(); + //} + break; + default: + YQL_ENSURE(false); + } + + State = EState::FINISHED; + } + } + + void ResumeExecution() { + CA_LOG_D("Resuming execution."); + State = EState::WRITING; + } + + void OnReady(const TTableId& tableId) override { + ProcessQueue(tableId); + } + + void OnPrepared(TPreparedInfo&& preparedInfo) override { + OnPreparedCallback(std::move(preparedInfo)); + Process(); + } + + void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { + Y_UNUSED(dataSize); + if (State == EState::COMMITTING && isShardEmpty) { + OnCommitCallback(shardId); + } else { + Process(); + } + } + + void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) override { + ReplyErrorAndDie(message, statusCode, subIssues); + } + + void ReplyErrorAndDie(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) { + CA_LOG_E("Error: " << message << ". statusCode=" << NYql::NDqProto::StatusIds_StatusCode_Name(statusCode) << ". subIssues=" << subIssues.ToString()); + Send(SessionActorId, new TEvKqpBuffer::TEvError{ + message, + statusCode, + subIssues, + }); + PassAway(); + } + +private: + TString LogPrefix; + + const TActorId SessionActorId; + + bool HasWrites = false; + ui64 LockTxId = 0; + ui64 LockNodeId = 0; + bool InconsistentTx = false; + + std::shared_ptr Alloc; + NMiniKQL::TTypeEnvironment TypeEnv; + + struct TWriteInfo { + TKqpTableWriteActor* WriteTableActor = nullptr; + TActorId WriteTableActorId; + + THashMap> ResumeExecutionCallbacks; + }; + + THashMap WriteInfos; + + EState State; + std::function OnFlushedCallback; + std::function OnPreparedCallback; + std::function OnCommitCallback; + + THashMap> DataQueues; + + const i64 MemoryLimit; + + IShardedWriteControllerPtr ShardedWriteController = nullptr; +}; + +class TKqpForwardWriteActor : public TActorBootstrapped, public NYql::NDq::IDqComputeActorAsyncOutput { + using TBase = TActorBootstrapped; + +public: + TKqpForwardWriteActor( + NKikimrKqp::TKqpTableSinkSettings&& settings, + NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args, + TIntrusivePtr counters) + : LogPrefix(TStringBuilder() << "TxId: " << args.TxId << ", task: " << args.TaskId << ". ") + , Settings(std::move(settings)) + , OutputIndex(args.OutputIndex) + , Callbacks(args.Callback) + , Counters(counters) + , TypeEnv(args.TypeEnv) + , Alloc(args.Alloc) + , BufferActorId(ActorIdFromProto(Settings.GetBufferActorId())) + , TxId(std::get(args.TxId)) + , TableId( + Settings.GetTable().GetOwnerId(), + Settings.GetTable().GetTableId(), + Settings.GetTable().GetVersion()) + { + EgressStats.Level = args.StatsLevel; + } + + void Bootstrap() { + LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; + Become(&TKqpForwardWriteActor::StateFuncFwd); + } + + static constexpr char ActorName[] = "KQP_FORWARD_WRITE_ACTOR"; + +private: + STFUNC(StateFuncFwd) { + try { + switch (ev->GetTypeRewrite()) { + hFunc(TEvBufferWriteResult, Handle); + default: + AFL_ENSURE(false)("unknown message", ev->GetTypeRewrite()); + } + } catch (const yexception& e) { + RuntimeError(e.what(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } + } + + void Handle(TEvBufferWriteResult::TPtr& result) { + WriteToken = result->Get()->Token; + DataSize = 0; + { + auto alloc = TypeEnv.BindAllocator(); + Data = nullptr; + } + + if (Closed) { + Callbacks->OnAsyncOutputFinished(GetOutputIndex()); + } + Callbacks->ResumeExecution(); + } + + void WriteToBuffer() { + auto ev = std::make_unique(); + + ev->Data = Data; + ev->Close = Closed; + ev->Alloc = Alloc; + + if (!WriteToken.IsEmpty()) { + ev->Token = WriteToken; + } else { + TVector columnsMetadata; + columnsMetadata.reserve(Settings.GetColumns().size()); + for (const auto & column : Settings.GetColumns()) { + columnsMetadata.push_back(column); + } + + ev->Settings = TWriteSettings{ + .TableId = TableId, + .TablePath = Settings.GetTable().GetPath(), + .OperationType = GetOperation(Settings.GetType()), + .Columns = std::move(columnsMetadata), + .TransactionSettings = TTransactionSettings{ + .TxId = TxId, + .LockTxId = Settings.GetLockTxId(), + .LockNodeId = Settings.GetLockNodeId(), + .InconsistentTx = Settings.GetInconsistentTx(), + }, + }; + } + + AFL_ENSURE(Send(BufferActorId, ev.release())); + EgressStats.Bytes += DataSize; + EgressStats.Chunks++; + EgressStats.Splits++; + EgressStats.Resume(); + } + + void CommitState(const NYql::NDqProto::TCheckpoint&) final {}; + void LoadState(const NYql::NDq::TSinkState&) final {}; + + ui64 GetOutputIndex() const final { + return OutputIndex; + } + + const NYql::NDq::TDqAsyncStats& GetEgressStats() const final { + return EgressStats; + } + + i64 GetFreeSpace() const final { + return kMaxForwardedSize - DataSize > 0 + ? kMaxForwardedSize - DataSize + : std::numeric_limits::min(); + } + + TMaybe ExtraData() override { + return {}; + } + + void SendData(NMiniKQL::TUnboxedValueBatch&& data, i64 size, const TMaybe&, bool finished) final { + YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); + Closed |= finished; + if (!Data) { + Data = std::make_shared>(); + } + + Data->emplace_back(std::move(data)); + DataSize += size; + + if (Closed || GetFreeSpace() <= 0) { + WriteToBuffer(); + } + } + + void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { + NYql::TIssue issue(message); + for (const auto& i : subIssues) { + issue.AddSubIssue(MakeIntrusive(i)); + } + + NYql::TIssues issues; + issues.AddIssue(std::move(issue)); + + Callbacks->OnAsyncOutputError(OutputIndex, std::move(issues), statusCode); + } + + ~TKqpForwardWriteActor() { + { + TGuard guard(*Alloc); + Data = nullptr; + } + } + + void PassAway() override { + TActorBootstrapped::PassAway(); + } + + TString LogPrefix; + const NKikimrKqp::TKqpTableSinkSettings Settings; + const ui64 OutputIndex; + NYql::NDq::TDqAsyncStats EgressStats; + NYql::NDq::IDqComputeActorAsyncOutput::ICallbacks * Callbacks = nullptr; + TIntrusivePtr Counters; + const NMiniKQL::TTypeEnvironment& TypeEnv; + std::shared_ptr Alloc; + + TActorId BufferActorId; + + std::shared_ptr> Data; + i64 DataSize = 0; + bool Closed = false; + + const ui64 TxId; + const TTableId TableId; + + TWriteToken WriteToken; +}; + +std::pair CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings) { + auto* actor = new TKqpBufferWriteActor(std::move(settings)); + return std::make_pair(actor, actor); +} + + +void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory& factory, TIntrusivePtr counters) { + factory.RegisterSink( + TString(NYql::KqpTableSinkName), + [counters] (NKikimrKqp::TKqpTableSinkSettings&& settings, NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args) { + if (!ActorIdFromProto(settings.GetBufferActorId())) { + auto* actor = new TKqpDirectWriteActor(std::move(settings), std::move(args), counters); + return std::make_pair(actor, actor); + } else { + auto* actor = new TKqpForwardWriteActor(std::move(settings), std::move(args), counters); + return std::make_pair(actor, actor); + } }); } diff --git a/ydb/core/kqp/runtime/kqp_write_actor.h b/ydb/core/kqp/runtime/kqp_write_actor.h index 844309a70a77..a81284d1efdc 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.h +++ b/ydb/core/kqp/runtime/kqp_write_actor.h @@ -1,11 +1,44 @@ #pragma once #include +#include +#include +#include #include +#include namespace NKikimr { namespace NKqp { + +// TODO: move somewhere else +class IKqpWriteBuffer { +public: + virtual ~IKqpWriteBuffer() = default; + + // Only when all writes are closed! + virtual void Flush(std::function callback) = 0; + //virtual void Flush(TTableId tableId) = 0; + + virtual void Prepare(std::function callback, TPrepareSettings&& prepareSettings) = 0; + virtual void OnCommit(std::function callback) = 0; + virtual void ImmediateCommit(std::function callback, ui64 txId) = 0; + //virtual void Rollback(std::function callback) = 0; + + virtual THashSet GetShardsIds() const = 0; + virtual THashMap GetLocks() const = 0; + + virtual bool IsFinished() const = 0; + + virtual TActorId GetActorId() const = 0; +}; + +struct TKqpBufferWriterSettings { + TActorId SessionActorId; +}; + +std::pair CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings); + void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory&, TIntrusivePtr); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index ebe0fdc64df4..6c8cc5a7c0f3 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -32,7 +32,7 @@ class IPayloadSerializer : public TThrRefBase { using IBatchPtr = TIntrusivePtr; - virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; + virtual void AddData(const NMiniKQL::TUnboxedValueBatch& data) = 0; virtual void AddBatch(const IBatchPtr& batch) = 0; virtual void Close() = 0; @@ -394,7 +394,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { Sharding = shardingConclusion.DetachResult(); } - void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { + void AddData(const NMiniKQL::TUnboxedValueBatch& data) override { YQL_ENSURE(!Closed); if (data.empty()) { return; @@ -433,14 +433,13 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { const i64 shardBatchMemory = NArrow::GetBatchDataSize(shardBatch); YQL_ENSURE(shardBatchMemory != 0); + ShardIds.insert(shardId); auto& unpreparedBatch = UnpreparedBatches[shardId]; unpreparedBatch.TotalDataSize += shardBatchMemory; unpreparedBatch.Batches.emplace_back(shardBatch); Memory += shardBatchMemory; FlushUnpreparedBatch(shardId, unpreparedBatch, force); - - ShardIds.insert(shardId); } } @@ -682,10 +681,10 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { public: TDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, + const NSchemeCache::TSchemeCacheRequest::TEntry& partitionsEntry, const TConstArrayRef inputColumns) : SchemeEntry(schemeEntry) - , KeyDescription(std::move(partitionsEntry.KeyDescription)) + , KeyDescription(partitionsEntry.KeyDescription) , Columns(BuildColumns(inputColumns)) , WriteIndex(BuildWriteIndexKeyFirst(SchemeEntry, inputColumns)) , WriteColumnIds(BuildWriteColumnIds(inputColumns, WriteIndex)) @@ -716,7 +715,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { ShardIds.insert(shardIter->ShardId); } - void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { + void AddData(const NMiniKQL::TUnboxedValueBatch& data) override { YQL_ENSURE(!Closed); TRowBuilder rowBuilder(Columns.size()); @@ -821,7 +820,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { } const NSchemeCache::TSchemeCacheNavigate::TEntry SchemeEntry; - THolder KeyDescription; + const THolder& KeyDescription; const TVector Columns; const std::vector WriteIndex; @@ -836,8 +835,6 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { bool Closed = false; }; -} - bool IPayloadSerializer::IBatch::IsEmpty() const { return GetMemory() == 0; } @@ -851,14 +848,27 @@ IPayloadSerializerPtr CreateColumnShardPayloadSerializer( IPayloadSerializerPtr CreateDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, + const NSchemeCache::TSchemeCacheRequest::TEntry& partitionsEntry, const TConstArrayRef inputColumns) { return MakeIntrusive( - schemeEntry, std::move(partitionsEntry), inputColumns); + schemeEntry, partitionsEntry, inputColumns); +} + } namespace { +struct TMetadata { + const TTableId TableId; + const NKikimrDataEvents::TEvWrite::TOperation::EOperationType OperationType; + const TVector InputColumnsMetadata; +}; + +struct TBatchWithMetadata { + IShardedWriteController::TWriteToken Token; + IPayloadSerializer::IBatchPtr Data; +}; + class TShardsInfo { public: class TShardInfo { @@ -892,14 +902,14 @@ class TShardsInfo { i64 dataSize = 0; while (BatchesInFlight < maxCount && BatchesInFlight < Batches.size() - && dataSize + GetBatch(BatchesInFlight)->GetMemory() <= maxDataSize) { - dataSize += GetBatch(BatchesInFlight)->GetMemory(); + && dataSize + GetBatch(BatchesInFlight).Data->GetMemory() <= maxDataSize) { + dataSize += GetBatch(BatchesInFlight).Data->GetMemory(); ++BatchesInFlight; } - YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight)->GetMemory() <= maxDataSize); + YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).Data->GetMemory() <= maxDataSize); } - const IPayloadSerializer::IBatchPtr& GetBatch(size_t index) const { + const TBatchWithMetadata& GetBatch(size_t index) const { return Batches.at(index); } @@ -907,7 +917,7 @@ class TShardsInfo { if (BatchesInFlight != 0 && Cookie == cookie) { ui64 dataSize = 0; for (size_t index = 0; index < BatchesInFlight; ++index) { - dataSize += Batches.front()->GetMemory(); + dataSize += Batches.front().Data->GetMemory(); Batches.pop_front(); } @@ -921,10 +931,10 @@ class TShardsInfo { return std::nullopt; } - void PushBatch(IPayloadSerializer::IBatchPtr&& batch) { + void PushBatch(TBatchWithMetadata&& batch) { YQL_ENSURE(!IsClosed()); Batches.emplace_back(std::move(batch)); - Memory += Batches.back()->GetMemory(); + Memory += Batches.back().Data->GetMemory(); } ui64 GetCookie() const { @@ -948,7 +958,7 @@ class TShardsInfo { } private: - std::deque Batches; + std::deque Batches; i64& Memory; ui64& NextCookie; @@ -1006,6 +1016,10 @@ class TShardsInfo { return ShardsInfo; } + const THashMap& GetShards() const { + return ShardsInfo; + } + i64 GetMemory() const { return Memory; } @@ -1029,63 +1043,115 @@ class TShardsInfo { class TShardedWriteController : public IShardedWriteController { public: - void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { + void OnPartitioningChanged(NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry) override { + SchemeEntry = std::move(schemeEntry); BeforePartitioningChanged(); - Serializer = CreateColumnShardPayloadSerializer( - schemeEntry, - InputColumnsMetadata); + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + auto& writeInfo = WriteInfos.at(token); + writeInfo.Serializer = CreateColumnShardPayloadSerializer( + *SchemeEntry, + writeInfo.Metadata.InputColumnsMetadata); + } AfterPartitioningChanged(); } void OnPartitioningChanged( - const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, + NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { + SchemeEntry = std::move(schemeEntry); + PartitionsEntry = std::move(partitionsEntry); BeforePartitioningChanged(); - Serializer = CreateDataShardPayloadSerializer( - schemeEntry, - std::move(partitionsEntry), - InputColumnsMetadata); + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + auto& writeInfo = WriteInfos.at(token); + writeInfo.Serializer = CreateDataShardPayloadSerializer( + *SchemeEntry, + *PartitionsEntry, + writeInfo.Metadata.InputColumnsMetadata); + } AfterPartitioningChanged(); } void BeforePartitioningChanged() { - if (Serializer) { - if (!Closed) { - Serializer->Close(); + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + auto& writeInfo = WriteInfos.at(token); + if (writeInfo.Serializer) { + if (!writeInfo.Closed) { + writeInfo.Serializer->Close(); + } + FlushSerializer(token, true); + writeInfo.Serializer = nullptr; } - FlushSerializer(true); } } void AfterPartitioningChanged() { - ShardsInfo.Close(); - ReshardData(); - ShardsInfo.Clear(); - if (Closed) { - Close(); - } else { - FlushSerializer(GetMemory() >= Settings.MemoryLimitTotal); + if (!WriteInfos.empty()) { + ShardsInfo.Close(); + ReshardData(); + ShardsInfo.Clear(); + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + const auto& writeInfo = WriteInfos.at(token); + if (writeInfo.Closed) { + Close(token); + } else { + FlushSerializer(token, GetMemory() >= Settings.MemoryLimitTotal); + } + } } } - void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { + TWriteToken Open( + const TTableId tableId, + const NKikimrDataEvents::TEvWrite::TOperation::EOperationType operationType, + TVector&& inputColumns) override { + auto token = CurrentWriteToken++; + auto iter = WriteInfos.emplace( + token, + TWriteInfo { + .Metadata = TMetadata { + .TableId = tableId, + .OperationType = operationType, + .InputColumnsMetadata = std::move(inputColumns), + }, + .Serializer = nullptr, + .Closed = false, + }).first; + if (PartitionsEntry) { + iter->second.Serializer = CreateDataShardPayloadSerializer( + *SchemeEntry, + *PartitionsEntry, + iter->second.Metadata.InputColumnsMetadata); + } else if (SchemeEntry) { + iter->second.Serializer = CreateColumnShardPayloadSerializer( + *SchemeEntry, + iter->second.Metadata.InputColumnsMetadata); + } + return token; + } + + void Write(TWriteToken token, const NMiniKQL::TUnboxedValueBatch& data) override { YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); - YQL_ENSURE(!Closed); + auto& info = WriteInfos.at(token); + YQL_ENSURE(!info.Closed); auto allocGuard = TypeEnv.BindAllocator(); - YQL_ENSURE(Serializer); - Serializer->AddData(std::move(data)); + YQL_ENSURE(info.Serializer); + info.Serializer->AddData(data); - FlushSerializer(GetMemory() >= Settings.MemoryLimitTotal); + FlushSerializer(token, GetMemory() >= Settings.MemoryLimitTotal); } - void Close() override { + void Close(TWriteToken token) override { auto allocGuard = TypeEnv.BindAllocator(); - YQL_ENSURE(Serializer); - Closed = true; - Serializer->Close(); - FlushSerializer(true); - YQL_ENSURE(Serializer->IsFinished()); + auto& info = WriteInfos.at(token); + YQL_ENSURE(info.Serializer); + info.Closed = true; + info.Serializer->Close(); + FlushSerializer(token, true); + YQL_ENSURE(info.Serializer->IsFinished()); + } + + void Close() override { ShardsInfo.Close(); } @@ -1093,6 +1159,15 @@ class TShardedWriteController : public IShardedWriteController { return ShardsInfo.GetPendingShards(); } + TVector GetShardsIds() const override { + TVector result; + result.reserve(ShardsInfo.GetShards().size()); + for (const auto& [id, _] : ShardsInfo.GetShards()) { + result.push_back(id); + } + return result; + } + std::optional GetMessageMetadata(ui64 shardId) override { auto& shardInfo = ShardsInfo.GetShard(shardId); if (shardInfo.IsEmpty()) { @@ -1103,7 +1178,8 @@ class TShardedWriteController : public IShardedWriteController { TMessageMetadata meta; meta.Cookie = shardInfo.GetCookie(); meta.OperationsCount = shardInfo.GetBatchesInFlight(); - meta.IsFinal = shardInfo.IsClosed() && shardInfo.Size() == shardInfo.GetBatchesInFlight(); + meta.IsLast = shardInfo.Size() == shardInfo.GetBatchesInFlight(); + meta.IsFinal = shardInfo.IsClosed() && meta.IsLast; meta.SendAttempts = shardInfo.GetSendAttempts(); return meta; @@ -1119,29 +1195,32 @@ class TShardedWriteController : public IShardedWriteController { for (size_t index = 0; index < shardInfo.GetBatchesInFlight(); ++index) { const auto& inFlightBatch = shardInfo.GetBatch(index); - YQL_ENSURE(!inFlightBatch->IsEmpty()); - result.TotalDataSize += inFlightBatch->GetMemory(); + YQL_ENSURE(!inFlightBatch.Data->IsEmpty()); + result.TotalDataSize += inFlightBatch.Data->GetMemory(); const ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(evWrite) - .AddDataToPayload(inFlightBatch->SerializeToString()); - result.PayloadIndexes.push_back(payloadIndex); + .AddDataToPayload(inFlightBatch.Data->SerializeToString()); + evWrite.AddOperation( + WriteInfos.at(inFlightBatch.Token).Metadata.OperationType, + WriteInfos.at(inFlightBatch.Token).Metadata.TableId, + WriteInfos.at(inFlightBatch.Token).Serializer->GetWriteColumnIds(), + payloadIndex, + WriteInfos.at(inFlightBatch.Token).Serializer->GetDataFormat()); } return result; } - NKikimrDataEvents::EDataFormat GetDataFormat() override { - return Serializer->GetDataFormat(); - } - - std::vector GetWriteColumnIds() override { - return Serializer->GetWriteColumnIds(); - } - - std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { + std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { auto allocGuard = TypeEnv.BindAllocator(); auto& shardInfo = ShardsInfo.GetShard(shardId); const auto removedDataSize = shardInfo.PopBatches(cookie); - return removedDataSize; + if (removedDataSize) { + return TMessageAcknowledgedResult { + .DataSize = *removedDataSize, + .IsShardEmpty = shardInfo.IsEmpty(), + }; + } + return std::nullopt; } void OnMessageSent(ui64 shardId, ui64 cookie) override { @@ -1161,29 +1240,56 @@ class TShardedWriteController : public IShardedWriteController { } i64 GetMemory() const override { - YQL_ENSURE(Serializer); - return Serializer->GetMemory() + ShardsInfo.GetMemory(); + i64 total = ShardsInfo.GetMemory(); + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + const auto& writeInfo = WriteInfos.at(token); + if (writeInfo.Serializer) { + total += writeInfo.Serializer->GetMemory(); + } else { + Y_ABORT_UNLESS(writeInfo.Closed); + } + } + return total; } - bool IsClosed() const override { - return Closed; + bool IsAllWritesClosed() const override { + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + if (!WriteInfos.at(token).Closed) { + return false; + } + } + return true; } - bool IsFinished() const override { - return IsClosed() && Serializer->IsFinished() && ShardsInfo.IsFinished(); + bool IsAllWritesFinished() const override { + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + const auto& writeInfo = WriteInfos.at(token); + if (!writeInfo.Closed || !writeInfo.Serializer->IsFinished()) { + return false; + } + } + return ShardsInfo.IsFinished(); } bool IsReady() const override { - return Serializer != nullptr; + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + const auto& writeInfo = WriteInfos.at(token); + if (!writeInfo.Serializer && !writeInfo.Closed) { + return false; + } + } + return true; + } + + ui64 GetShardsCount() const override { + return ShardsInfo.GetShards().size(); } TShardedWriteController( const TShardedWriteControllerSettings settings, - TVector&& inputColumnsMetadata, const NMiniKQL::TTypeEnvironment& typeEnv, std::shared_ptr alloc) : Settings(settings) - , InputColumnsMetadata(std::move(inputColumnsMetadata)) , TypeEnv(typeEnv) , Alloc(alloc) { } @@ -1192,26 +1298,36 @@ class TShardedWriteController : public IShardedWriteController { Y_ABORT_UNLESS(Alloc); TGuard allocGuard(*Alloc); ShardsInfo.Clear(); - Serializer = nullptr; + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + WriteInfos.at(token).Serializer = nullptr; + } } private: - void FlushSerializer(bool force) { + void FlushSerializer(TWriteToken token, bool force) { if (force) { - for (auto& [shardId, batches] : Serializer->FlushBatchesForce()) { + const auto& writeInfo = WriteInfos.at(token); + for (auto& [shardId, batches] : writeInfo.Serializer->FlushBatchesForce()) { for (auto& batch : batches) { - ShardsInfo.GetShard(shardId).PushBatch(std::move(batch)); + ShardsInfo.GetShard(shardId).PushBatch(TBatchWithMetadata{ + .Token = token, + .Data = std::move(batch), + }); } } } else { - for (const ui64 shardId : Serializer->GetShardIds()) { + const auto& writeInfo = WriteInfos.at(token); + for (const ui64 shardId : writeInfo.Serializer->GetShardIds()) { auto& shard = ShardsInfo.GetShard(shardId); while (true) { - auto batch = Serializer->FlushBatch(shardId); + auto batch = writeInfo.Serializer->FlushBatch(shardId); if (!batch || batch->IsEmpty()) { break; } - shard.PushBatch(std::move(batch)); + shard.PushBatch(TBatchWithMetadata{ + .Token = token, + .Data = std::move(batch), + }); } } } @@ -1228,20 +1344,30 @@ class TShardedWriteController : public IShardedWriteController { void ReshardData() { for (auto& [_, shardInfo] : ShardsInfo.GetShards()) { for (size_t index = 0; index < shardInfo.Size(); ++index) { - Serializer->AddBatch(shardInfo.GetBatch(index)); + const auto& batch = shardInfo.GetBatch(index); + const auto& writeInfo = WriteInfos.at(batch.Token); + writeInfo.Serializer->AddBatch(batch.Data); } } } TShardedWriteControllerSettings Settings; - TVector InputColumnsMetadata; const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Alloc; + struct TWriteInfo { + TMetadata Metadata; + IPayloadSerializerPtr Serializer = nullptr; + bool Closed = false; + }; + + THashMap WriteInfos; + TWriteToken CurrentWriteToken = 0; + TShardsInfo ShardsInfo; - bool Closed = false; - IPayloadSerializerPtr Serializer = nullptr; + std::optional SchemeEntry; + std::optional PartitionsEntry; }; } @@ -1249,11 +1375,10 @@ class TShardedWriteController : public IShardedWriteController { IShardedWriteControllerPtr CreateShardedWriteController( const TShardedWriteControllerSettings& settings, - TVector&& inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv, std::shared_ptr alloc) { return MakeIntrusive( - settings, std::move(inputColumns), typeEnv, alloc); + settings, typeEnv, alloc); } } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 46e5ac4f7308..aadadaac51a0 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -12,19 +12,34 @@ namespace NKqp { class IShardedWriteController : public TThrRefBase { public: - virtual void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) = 0; virtual void OnPartitioningChanged( - const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, + NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry) = 0; + virtual void OnPartitioningChanged( + NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) = 0; - virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; + using TWriteToken = ui64; + + // Data ordering invariant: + // For two writes A and B: + // A happend before B <=> Close(A) happend before Open(B). + + virtual TWriteToken Open( + const TTableId TableId, + const NKikimrDataEvents::TEvWrite::TOperation::EOperationType operationType, + TVector&& inputColumns) = 0; + virtual void Write(TWriteToken token, const NMiniKQL::TUnboxedValueBatch& data) = 0; + virtual void Close(TWriteToken token) = 0; virtual void Close() = 0; virtual TVector GetPendingShards() const = 0; + virtual ui64 GetShardsCount() const = 0; + virtual TVector GetShardsIds() const = 0; struct TMessageMetadata { ui64 Cookie = 0; ui64 OperationsCount = 0; + bool IsLast = false; bool IsFinal = false; ui64 SendAttempts = 0; }; @@ -36,18 +51,21 @@ class IShardedWriteController : public TThrRefBase { }; virtual TSerializationResult SerializeMessageToPayload(ui64 shardId, NKikimr::NEvents::TDataEvents::TEvWrite& evWrite) = 0; - virtual NKikimrDataEvents::EDataFormat GetDataFormat() = 0; - virtual std::vector GetWriteColumnIds() = 0; - virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; + struct TMessageAcknowledgedResult { + ui64 DataSize = 0; + bool IsShardEmpty = 0; + }; + + virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; virtual void OnMessageSent(ui64 shardId, ui64 cookie) = 0; virtual void ResetRetries(ui64 shardId, ui64 cookie) = 0; virtual i64 GetMemory() const = 0; - virtual bool IsClosed() const = 0; - virtual bool IsFinished() const = 0; + virtual bool IsAllWritesClosed() const = 0; + virtual bool IsAllWritesFinished() const = 0; virtual bool IsReady() const = 0; }; @@ -63,7 +81,6 @@ struct TShardedWriteControllerSettings { IShardedWriteControllerPtr CreateShardedWriteController( const TShardedWriteControllerSettings& settings, - TVector&& inputColumns, const NMiniKQL::TTypeEnvironment& typeEnv, std::shared_ptr alloc); diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index b2c4298b13d9..0deb5b7c82d5 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -348,7 +348,7 @@ class TKqpQueryState : public TNonCopyable { } if (HasTxSinkInTx(tx)) { - // At current time transactional internal sinks require separate tnx with commit. + // Sink results can't be committed in transaction. return false; } diff --git a/ydb/core/protos/kqp.proto b/ydb/core/protos/kqp.proto index 890c38391d5a..de99d91aafc2 100644 --- a/ydb/core/protos/kqp.proto +++ b/ydb/core/protos/kqp.proto @@ -696,11 +696,6 @@ message TEvKqpOutputActorResultInfo { repeated NKikimrDataEvents.TLock Locks = 1; } -message TKqpTableSinkLocks { - repeated NKikimrDataEvents.TLock Locks = 1; - repeated uint64 SendingShards = 2; - repeated uint64 ReceivingShards = 3; -} message TKqpTableSinkSettings { enum EType { @@ -716,11 +711,9 @@ message TKqpTableSinkSettings { repeated TKqpColumnMetadataProto Columns = 5; optional uint64 LockTxId = 6; optional uint64 LockNodeId = 7; - optional bool FinalTx = 8; // If tx is immediate then commit, otherwise send prepare - optional bool ImmediateTx = 9; // Try to be immediate tx - optional bool InconsistentTx = 10; // Write each batch in it's own single immediate tx - optional EType Type = 11; - optional TKqpTableSinkLocks Locks = 12; + optional bool InconsistentTx = 8; // Write each batch in it's own single immediate tx + optional EType Type = 9; + optional NActorsProto.TActorId BufferActorId = 10; } message TKqpStreamLookupSettings { From 26030c900c29fe799401843f75991f50bd40d444 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 8 Sep 2024 19:20:16 +0300 Subject: [PATCH 02/69] fix --- ydb/core/kqp/runtime/ya.make | 9 +++++---- ydb/core/kqp/session_actor/kqp_query_state.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ydb/core/kqp/runtime/ya.make b/ydb/core/kqp/runtime/ya.make index 4fc296d934ca..536aedd23989 100644 --- a/ydb/core/kqp/runtime/ya.make +++ b/ydb/core/kqp/runtime/ya.make @@ -30,23 +30,24 @@ SRCS( PEERDIR( contrib/libs/apache/arrow + library/cpp/threading/hot_swap ydb/core/actorlib_impl ydb/core/base ydb/core/engine ydb/core/engine/minikql ydb/core/formats ydb/core/kqp/common + ydb/core/kqp/common/buffer ydb/core/protos ydb/core/scheme ydb/core/ydb_convert - ydb/library/yql/minikql/computation/llvm14 - ydb/library/yql/minikql/comp_nodes - ydb/library/yql/utils ydb/library/yql/dq/actors/protos ydb/library/yql/dq/actors/spilling ydb/library/yql/dq/common ydb/library/yql/dq/runtime - library/cpp/threading/hot_swap + ydb/library/yql/minikql/comp_nodes + ydb/library/yql/minikql/computation/llvm14 + ydb/library/yql/utils ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index 0deb5b7c82d5..c4fc7e80ae69 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -348,7 +348,7 @@ class TKqpQueryState : public TNonCopyable { } if (HasTxSinkInTx(tx)) { - // Sink results can't be committed in transaction. + // Sink results can't be committed with changes return false; } From b0f705a22183f9ea1e539f9f82d71817c64c2019 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 8 Sep 2024 23:33:06 +0300 Subject: [PATCH 03/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 113 +++++++++++++---------- 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 73d7b6df1205..ac4d6b2a5e83 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -104,9 +104,10 @@ struct IKqpTableWriterCallbacks { virtual void OnReady(const TTableId& tableId) = 0; - virtual void OnPrepared(TPreparedInfo&& preparedInfo) = 0; + // TODO: also track memory here + virtual void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) = 0; - //virtual void OnCommitted(ui64 shardId) = 0; + virtual void OnCommitted(ui64 shardId, ui64 dataSize) = 0; virtual void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) = 0; @@ -456,14 +457,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { return; } case NKikimrDataEvents::TEvWriteResult::STATUS_PREPARED: { - const auto& result = ev->Get()->Record; - TPreparedInfo preparedInfo; - preparedInfo.ShardId = result.GetOrigin(); - preparedInfo.MinStep = result.GetMinStep(); - preparedInfo.MaxStep = result.GetMaxStep(); - preparedInfo.Coordinators = TVector(result.GetDomainCoordinators().begin(), - result.GetDomainCoordinators().end()); - Callbacks->OnPrepared(std::move(preparedInfo)); + ProcessWritePreparedShard(ev); return; } case NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED: { @@ -602,6 +596,22 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } + void ProcessWritePreparedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { + YQL_ENSURE(Mode == EMode::PREPARE); + const auto& record = ev->Get()->Record; + TPreparedInfo preparedInfo; + preparedInfo.ShardId = record.GetOrigin(); + preparedInfo.MinStep = record.GetMinStep(); + preparedInfo.MaxStep = record.GetMaxStep(); + preparedInfo.Coordinators = TVector(record.GetDomainCoordinators().begin(), + record.GetDomainCoordinators().end()); + const auto result = ShardedWriteController->OnMessageAcknowledged( + ev->Get()->Record.GetOrigin(), ev->Cookie); + if (result) { + Callbacks->OnPrepared(std::move(preparedInfo), result->DataSize); + } + } + void ProcessWriteCompletedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() << ", TabletId=" << ev->Get()->Record.GetOrigin() @@ -625,7 +635,11 @@ class TKqpTableWriteActor : public TActorBootstrapped { } if (Mode == EMode::COMMIT) { - Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), 0, true); + const auto result = ShardedWriteController->OnMessageAcknowledged( + ev->Get()->Record.GetOrigin(), ev->Cookie); + if (result) { + Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); + } } else { const auto result = ShardedWriteController->OnMessageAcknowledged( ev->Get()->Record.GetOrigin(), ev->Cookie); @@ -638,43 +652,34 @@ class TKqpTableWriteActor : public TActorBootstrapped { void SetPrepare(ui64 txId) { Mode = EMode::PREPARE; TxId = txId; - for (const auto shardId : ShardedWriteController->GetShardsIds()) { + + // TODO: move to ShardedWriteController + /*for (const auto shardId : ShardedWriteController->GetShardsIds()) { const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); if (!metadata || (metadata->IsLast && metadata->SendAttempts != 0)) { SendEmptyFinalToShard(shardId); } - } + }*/ } void SetCommit() { Mode = EMode::COMMIT; + // TODO: ShardedWriteController for empty } void SetImmediateCommit(ui64 txId) { Mode = EMode::IMMEDIATE_COMMIT; TxId = txId; - // TODO: send data for empty + // TODO: ShardedWriteController for empty } void Flush() { - //Mode = EMode::FLUSH; + Mode = EMode::FLUSH; for (const size_t shardId : ShardedWriteController->GetPendingShards()) { SendDataToShard(shardId); } } - void SendEmptyFinalToShard(const ui64 shardId) { - auto evWrite = std::make_unique( - NKikimrDataEvents::TEvWrite::MODE_PREPARE); - evWrite->SetTxId(TxId); - evWrite->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - Send( - PipeCacheId, - new TEvPipeCache::TEvForward(evWrite.release(), shardId, true), - 0, - 0); - } - void SendDataToShard(const ui64 shardId) { const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); YQL_ENSURE(metadata); @@ -701,26 +706,28 @@ class TKqpTableWriteActor : public TActorBootstrapped { ? NKikimrDataEvents::TEvWrite::MODE_PREPARE : NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - if (Closed && isImmediateCommit) { - evWrite->Record.SetTxId(TxId); + if (isImmediateCommit) { const auto lock = LocksManager.GetLock(shardId); - // multi immediate evwrite - auto* locks = evWrite->Record.MutableLocks(); - locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - //locks->AddSendingShards(shardId); // TODO: other shards - //locks->AddReceivingShards(shardId); if (lock) { - *locks->AddLocks() = *lock; + evWrite->Record.SetTxId(TxId); + auto* locks = evWrite->Record.MutableLocks(); + locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + locks->AddSendingShards(shardId); + locks->AddReceivingShards(shardId); + if (lock) { + *locks->AddLocks() = *lock; + } } - } else if (Closed && isPrepare) { + } else if (isPrepare) { evWrite->Record.SetTxId(TxId); - // NOT TRUE:: // Last immediate write (only for datashard) - const auto lock = LocksManager.GetLock(shardId); - // multi immediate evwrite auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - //locks->AddSendingShards(shardId); // TODO: other shards + // TODO: other shards from prepareInfo + locks->AddSendingShards(shardId); locks->AddReceivingShards(shardId); + + // TODO: multi locks (for tablestore support) + const auto lock = LocksManager.GetLock(shardId); if (lock) { *locks->AddLocks() = *lock; } @@ -1056,7 +1063,12 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu Process(); } - void OnPrepared(TPreparedInfo&&) override { + void OnPrepared(TPreparedInfo&&, ui64) override { + AFL_ENSURE(false); + } + + void OnCommitted(ui64, ui64) override { + AFL_ENSURE(false); } void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { @@ -1488,18 +1500,23 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub ProcessQueue(tableId); } - void OnPrepared(TPreparedInfo&& preparedInfo) override { + void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) override { + AFL_ENSURE(State == EState::PREPARING); + Y_UNUSED(dataSize); OnPreparedCallback(std::move(preparedInfo)); Process(); } - void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { + void OnCommitted(ui64 shardId, ui64 dataSize) override { + AFL_ENSURE(State == EState::COMMITTING); Y_UNUSED(dataSize); - if (State == EState::COMMITTING && isShardEmpty) { - OnCommitCallback(shardId); - } else { - Process(); - } + OnCommitCallback(shardId); + Process(); + } + + void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { + Y_UNUSED(shardId, dataSize, isShardEmpty); + Process(); } void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) override { From 0ad8caaf432d34f38a13d25279751b45fb92c4cf Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 9 Sep 2024 10:32:40 +0300 Subject: [PATCH 04/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 67 ++++++++---------------- ydb/core/kqp/runtime/kqp_write_actor.h | 25 +-------- 2 files changed, 24 insertions(+), 68 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index ac4d6b2a5e83..38f5f7fcb0ae 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -104,7 +104,6 @@ struct IKqpTableWriterCallbacks { virtual void OnReady(const TTableId& tableId) = 0; - // TODO: also track memory here virtual void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) = 0; virtual void OnCommitted(ui64 shardId, ui64 dataSize) = 0; @@ -653,13 +652,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { Mode = EMode::PREPARE; TxId = txId; - // TODO: move to ShardedWriteController - /*for (const auto shardId : ShardedWriteController->GetShardsIds()) { - const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); - if (!metadata || (metadata->IsLast && metadata->SendAttempts != 0)) { - SendEmptyFinalToShard(shardId); - } - }*/ + // TODO: ShardedWriteController for empty } void SetCommit() { @@ -1138,6 +1131,7 @@ struct TBufferWriteMessage { TActorId From; TWriteToken Token; bool Close = false; + // TODO: move to serialized data std::shared_ptr> Data; std::shared_ptr Alloc; }; @@ -1157,7 +1151,7 @@ struct TEvBufferWriteResult : public TEventLocal, public IKqpWriteBuffer, public IKqpTableWriterCallbacks { +class TKqpBufferWriteActor :public TActorBootstrapped, public IKqpTableWriterCallbacks { using TBase = TActorBootstrapped; public: @@ -1258,6 +1252,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub auto result = std::make_unique(); result->Token = message.Token; + // TODO: send ok only when there are free space Send(message.From, result.release()); { @@ -1317,7 +1312,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub return result; } - THashMap GetLocks() const override { + THashMap GetLocks() const { THashMap result; for (const auto& [_, info] : WriteInfos) { for (const auto& [shardId, lockInfo] : info.WriteTableActor->GetLocks()) { @@ -1329,18 +1324,18 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub return result; } - void Flush(std::function callback) override { + void Flush() { State = EState::FLUSHING; - OnFlushedCallback = callback; + //OnFlushedCallback = callback; Close(); Process(); } - void Prepare(std::function callback, TPrepareSettings&& prepareSettings) override { + void Prepare(TPrepareSettings&& prepareSettings) { YQL_ENSURE(State == EState::WRITING); - Y_UNUSED(callback, prepareSettings); + Y_UNUSED(prepareSettings); State = EState::PREPARING; - OnPreparedCallback = std::move(callback); + // OnPreparedCallback = std::move(callback); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetPrepare(prepareSettings.TxId); } @@ -1348,19 +1343,19 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Process(); } - void OnCommit(std::function callback) override { + void OnCommit() { YQL_ENSURE(State == EState::PREPARING); State = EState::COMMITTING; - OnCommitCallback = std::move(callback); + //OnCommitCallback = std::move(callback); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetCommit(); } } - void ImmediateCommit(std::function callback, ui64 txId) override { + void ImmediateCommit(ui64 txId) { YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; - OnCommitCallback = std::move(callback); + //OnCommitCallback = std::move(callback); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetImmediateCommit(txId); } @@ -1376,7 +1371,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - bool IsFinished() const override { + bool IsFinished() const { return State == EState::FINISHED; } @@ -1401,7 +1396,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub return totalMemory; } - THashSet GetShardsIds() const override { + THashSet GetShardsIds() const { THashSet shardIds; for (auto& [_, info] : WriteInfos) { for (const auto& id : info.WriteTableActor->GetShardsIds()) { @@ -1435,10 +1430,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub PassAway(); } - TActorId GetActorId() const override { - return SelfId(); - } - void Process() { if (GetTotalFreeSpace() <= 0) { State = EState::WAITING; @@ -1468,20 +1459,13 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub CA_LOG_D("Write actor finished"); switch (State) { case EState::PREPARING: - //Settings.Callbacks->OnPrepared(); break; case EState::COMMITTING: - //Settings.Callbacks->OnCommitted(); break; case EState::ROLLINGBACK: - //Settings.Callbacks->OnRolledBack(); break; case EState::FLUSHING: - //Settings.Callbacks->OnFlushed(); - //if (OnFlushedCallback != nullptr) { - YQL_ENSURE(OnFlushedCallback != nullptr); - OnFlushedCallback(); - //} + //OnFlushedCallback(); break; default: YQL_ENSURE(false); @@ -1502,15 +1486,15 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) override { AFL_ENSURE(State == EState::PREPARING); - Y_UNUSED(dataSize); - OnPreparedCallback(std::move(preparedInfo)); + Y_UNUSED(preparedInfo, dataSize); + //OnPreparedCallback(std::move(preparedInfo)); Process(); } void OnCommitted(ui64 shardId, ui64 dataSize) override { AFL_ENSURE(State == EState::COMMITTING); - Y_UNUSED(dataSize); - OnCommitCallback(shardId); + Y_UNUSED(shardId, dataSize); + //OnCommitCallback(shardId); Process(); } @@ -1556,10 +1540,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub THashMap WriteInfos; EState State; - std::function OnFlushedCallback; - std::function OnPreparedCallback; - std::function OnCommitCallback; - THashMap> DataQueues; const i64 MemoryLimit; @@ -1743,9 +1723,8 @@ class TKqpForwardWriteActor : public TActorBootstrapped, TWriteToken WriteToken; }; -std::pair CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings) { - auto* actor = new TKqpBufferWriteActor(std::move(settings)); - return std::make_pair(actor, actor); +NActors::IActor* CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings) { + return new TKqpBufferWriteActor(std::move(settings)); } diff --git a/ydb/core/kqp/runtime/kqp_write_actor.h b/ydb/core/kqp/runtime/kqp_write_actor.h index a81284d1efdc..ad19c2545536 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.h +++ b/ydb/core/kqp/runtime/kqp_write_actor.h @@ -10,34 +10,11 @@ namespace NKikimr { namespace NKqp { - -// TODO: move somewhere else -class IKqpWriteBuffer { -public: - virtual ~IKqpWriteBuffer() = default; - - // Only when all writes are closed! - virtual void Flush(std::function callback) = 0; - //virtual void Flush(TTableId tableId) = 0; - - virtual void Prepare(std::function callback, TPrepareSettings&& prepareSettings) = 0; - virtual void OnCommit(std::function callback) = 0; - virtual void ImmediateCommit(std::function callback, ui64 txId) = 0; - //virtual void Rollback(std::function callback) = 0; - - virtual THashSet GetShardsIds() const = 0; - virtual THashMap GetLocks() const = 0; - - virtual bool IsFinished() const = 0; - - virtual TActorId GetActorId() const = 0; -}; - struct TKqpBufferWriterSettings { TActorId SessionActorId; }; -std::pair CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings); +NActors::IActor* CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings); void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory&, TIntrusivePtr); From 0a6e5167f23a1eca69e6225ac8b564858ac6cf67 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 20 Sep 2024 11:31:44 +0300 Subject: [PATCH 05/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 89 +++++-------------- .../kqp/runtime/kqp_write_actor_settings.h | 1 + 2 files changed, 22 insertions(+), 68 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 38f5f7fcb0ae..9609c1728b32 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -61,8 +61,8 @@ namespace { class TLocksManager { public: - void AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lock) { - Locks[shardId].AddAndCheckLock(lock); + bool AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lock) { + return Locks[shardId].AddAndCheckLock(lock); } const std::optional& GetLock(ui64 shardId) { @@ -110,6 +110,8 @@ struct IKqpTableWriterCallbacks { virtual void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) = 0; + //virtual void OnCompleted(ui64 shardId, ui64 dataSize, bool isShardEmpty) = 0; + virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; }; @@ -168,11 +170,11 @@ class TKqpTableWriteActor : public TActorBootstrapped { try { ShardedWriteController = CreateShardedWriteController( TShardedWriteControllerSettings { - .MemoryLimitTotal = kInFlightMemoryLimitPerActor, - .MemoryLimitPerMessage = kMemoryLimitPerMessage, + .MemoryLimitTotal = MessageSettings.InFlightMemoryLimitPerActorBytes, + .MemoryLimitPerMessage = MessageSettings.MemoryLimitPerMessageBytes, .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable ? 1 - : kMaxBatchesPerMessage), + : MessageSettings.MaxBatchesPerMessage), }, TypeEnv, Alloc); @@ -206,19 +208,15 @@ class TKqpTableWriteActor : public TActorBootstrapped { } TVector GetShardsIds() const { - return (!ShardedWriteController) - ? TVector() - : ShardedWriteController->GetShardsIds(); + return ShardedWriteController->GetShardsIds(); } std::optional GetShardsCount() const { - return (InconsistentTx || !ShardedWriteController) + return InconsistentTx ? std::nullopt : std::optional(ShardedWriteController->GetShardsCount()); } - // void Commit(bool immediate) {} - using TWriteToken = IShardedWriteController::TWriteToken; TWriteToken Open( @@ -624,13 +622,13 @@ class TKqpTableWriteActor : public TActorBootstrapped { }()); for (const auto& lock : ev->Get()->Record.GetTxLocks()) { - if (!LocksInfo[ev->Get()->Record.GetOrigin()].AddAndCheckLock(lock)) { + if (!LocksManager.AddLock(ev->Get()->Record.GetOrigin(), lock)) { RuntimeError( TStringBuilder() << "Transaction locks invalidated. Table `" << SchemeEntry->TableId.PathId.ToString() << "`.", NYql::NDqProto::StatusIds::ABORTED, NYql::TIssues{}); - LocksManager.AddLock(ev->Get()->Record.GetOrigin(), lock); + } } if (Mode == EMode::COMMIT) { @@ -839,6 +837,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { NActors::TActorId PipeCacheId = NKikimr::MakePipePerNodeCacheID(false); TString LogPrefix; + TWriteActorSettings MessageSettings; const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Alloc; @@ -930,7 +929,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu i64 GetFreeSpace() const final { return (WriteTableActor && WriteTableActor->IsReady()) - ? MemoryLimit - GetMemory() + ? MessageSettings.InFlightMemoryLimitPerActorBytes - GetMemory() : std::numeric_limits::min(); // Can't use zero here because compute can use overcommit! } @@ -970,7 +969,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu void Process() { if (GetFreeSpace() <= 0) { WaitingForTableActor = true; - } else if (WaitingForTableActor && GetFreeSpace() > MemoryLimit / 2) { + } else if (WaitingForTableActor && GetFreeSpace() > MessageSettings.InFlightMemoryLimitPerActorBytes / 2) { ResumeExecution(); } @@ -1001,51 +1000,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu TActorBootstrapped::PassAway(); } - void Prepare() { - YQL_ENSURE(SchemeEntry); - ResolveAttempts = 0; - - if (!ShardedWriteController) { - TVector columnsMetadata; - columnsMetadata.reserve(Settings.GetColumns().size()); - for (const auto & column : Settings.GetColumns()) { - columnsMetadata.push_back(column); - } - - try { - ShardedWriteController = CreateShardedWriteController( - TShardedWriteControllerSettings { - .MemoryLimitTotal = MessageSettings.InFlightMemoryLimitPerActorBytes, - .MemoryLimitPerMessage = MessageSettings.MemoryLimitPerMessageBytes, - .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable - ? 1 - : MessageSettings.MaxBatchesPerMessage), - }, - std::move(columnsMetadata), - TypeEnv, - Alloc); - } catch (...) { - RuntimeError( - CurrentExceptionMessage(), - NYql::NDqProto::StatusIds::INTERNAL_ERROR); - } - } - - try { - if (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { - ShardedWriteController->OnPartitioningChanged(*SchemeEntry); - } else { - ShardedWriteController->OnPartitioningChanged(*SchemeEntry, std::move(*SchemeRequest)); - } - ResumeExecution(); - } catch (...) { - RuntimeError( - CurrentExceptionMessage(), - NYql::NDqProto::StatusIds::INTERNAL_ERROR); - } - ProcessBatches(); - } - void ResumeExecution() { CA_LOG_D("Resuming execution."); WaitingForTableActor = false; @@ -1097,7 +1051,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu bool Closed = false; bool WaitingForTableActor = false; - const i64 MemoryLimit = kInFlightMemoryLimitPerActor; }; @@ -1378,12 +1331,12 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub i64 GetFreeSpace(TWriteToken token) const { auto& info = WriteInfos.at(token.TableId); return info.WriteTableActor->IsReady() - ? MemoryLimit - info.WriteTableActor->GetMemory() + ? MessageSettings.InFlightMemoryLimitPerActorBytes - info.WriteTableActor->GetMemory() : std::numeric_limits::min(); // Can't use zero here because compute can use overcommit! } i64 GetTotalFreeSpace() const { - return MemoryLimit - GetTotalMemory(); + return MessageSettings.InFlightMemoryLimitPerActorBytes - GetTotalMemory(); } i64 GetTotalMemory() const { @@ -1433,7 +1386,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Process() { if (GetTotalFreeSpace() <= 0) { State = EState::WAITING; - } else if (State == EState::WAITING && GetTotalFreeSpace() > MemoryLimit / 2) { + } else if (State == EState::WAITING && GetTotalFreeSpace() > MessageSettings.InFlightMemoryLimitPerActorBytes / 2) { ResumeExecution(); } @@ -1519,6 +1472,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub private: TString LogPrefix; + TWriteActorSettings MessageSettings; const TActorId SessionActorId; @@ -1542,8 +1496,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub EState State; THashMap> DataQueues; - const i64 MemoryLimit; - IShardedWriteControllerPtr ShardedWriteController = nullptr; }; @@ -1655,8 +1607,8 @@ class TKqpForwardWriteActor : public TActorBootstrapped, } i64 GetFreeSpace() const final { - return kMaxForwardedSize - DataSize > 0 - ? kMaxForwardedSize - DataSize + return MessageSettings.MaxForwardedSize - DataSize > 0 + ? MessageSettings.MaxForwardedSize - DataSize : std::numeric_limits::min(); } @@ -1704,6 +1656,7 @@ class TKqpForwardWriteActor : public TActorBootstrapped, TString LogPrefix; const NKikimrKqp::TKqpTableSinkSettings Settings; + TWriteActorSettings MessageSettings; const ui64 OutputIndex; NYql::NDq::TDqAsyncStats EgressStats; NYql::NDq::IDqComputeActorAsyncOutput::ICallbacks * Callbacks = nullptr; diff --git a/ydb/core/kqp/runtime/kqp_write_actor_settings.h b/ydb/core/kqp/runtime/kqp_write_actor_settings.h index 328dcd5120a7..37e8bfe91055 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor_settings.h +++ b/ydb/core/kqp/runtime/kqp_write_actor_settings.h @@ -11,6 +11,7 @@ struct TWriteActorSettings : TAtomicRefCount { i64 InFlightMemoryLimitPerActorBytes = 64_MB; i64 MemoryLimitPerMessageBytes = 64_MB; i64 MaxBatchesPerMessage = 1000; + i64 MaxForwardedSize = 64_MB; TDuration StartRetryDelay = TDuration::Seconds(1); TDuration MaxRetryDelay = TDuration::Seconds(10); From 25211b1cb3f7485cd91795f0e51d4aa5cc3b3900 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 20 Sep 2024 12:56:25 +0300 Subject: [PATCH 06/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 37 ++++++++++-------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 9609c1728b32..e127df516bff 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -102,16 +102,14 @@ namespace NKqp { struct IKqpTableWriterCallbacks { virtual ~IKqpTableWriterCallbacks() = default; + // Ready to accept writes virtual void OnReady(const TTableId& tableId) = 0; + // EvWrite statuses virtual void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) = 0; - virtual void OnCommitted(ui64 shardId, ui64 dataSize) = 0; - virtual void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) = 0; - //virtual void OnCompleted(ui64 shardId, ui64 dataSize, bool isShardEmpty) = 0; - virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; }; @@ -141,8 +139,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { }; enum class EMode { - UNSPECIFIED, - FLUSH, + WRITE, PREPARE, COMMIT, IMMEDIATE_COMMIT, @@ -631,41 +628,36 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } - if (Mode == EMode::COMMIT) { - const auto result = ShardedWriteController->OnMessageAcknowledged( - ev->Get()->Record.GetOrigin(), ev->Cookie); - if (result) { - Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); - } - } else { - const auto result = ShardedWriteController->OnMessageAcknowledged( + const auto result = ShardedWriteController->OnMessageAcknowledged( ev->Get()->Record.GetOrigin(), ev->Cookie); - if (result) { - Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), result->DataSize, result->IsShardEmpty); - } + if (result && (Mode == EMode::COMMIT || Mode == EMode::IMMEDIATE_COMMIT)) { + Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); + } else if (result) { + Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), result->DataSize, result->IsShardEmpty); } } void SetPrepare(ui64 txId) { + YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::PREPARE; TxId = txId; - // TODO: ShardedWriteController for empty + // TODO: other shards from prepareInfo } void SetCommit() { + YQL_ENSURE(Mode == EMode::PREPARE); Mode = EMode::COMMIT; - // TODO: ShardedWriteController for empty } void SetImmediateCommit(ui64 txId) { + YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::IMMEDIATE_COMMIT; TxId = txId; // TODO: ShardedWriteController for empty } void Flush() { - Mode = EMode::FLUSH; for (const size_t shardId : ShardedWriteController->GetPendingShards()) { SendDataToShard(shardId); } @@ -837,7 +829,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { NActors::TActorId PipeCacheId = NKikimr::MakePipePerNodeCacheID(false); TString LogPrefix; - TWriteActorSettings MessageSettings; + TWriteActorSettings MessageSettings; // TODO: fill it const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Alloc; @@ -857,7 +849,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { TLocksManager LocksManager; bool Closed = false; - EMode Mode = EMode::UNSPECIFIED; + EMode Mode = EMode::WRITE; IShardedWriteControllerPtr ShardedWriteController = nullptr; }; @@ -958,6 +950,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu EgressStats.Resume(); Y_UNUSED(size); + YQL_ENSURE(WriteTableActor); WriteTableActor->Write(*WriteToken, data); if (Closed) { WriteTableActor->Close(*WriteToken); From 8ef8933e1875ee3baf1e1a6b06dbf73fd94660ea Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 20 Sep 2024 13:20:36 +0300 Subject: [PATCH 07/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 6 ++- ydb/core/kqp/runtime/kqp_write_table.cpp | 58 ++++++++++++++++-------- ydb/core/kqp/runtime/kqp_write_table.h | 5 +- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index e127df516bff..46ea761e2828 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -641,7 +641,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::PREPARE; TxId = txId; - // TODO: ShardedWriteController for empty + ShardedWriteController->AddCoveringMessages(); // TODO: other shards from prepareInfo } @@ -654,7 +654,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::IMMEDIATE_COMMIT; TxId = txId; - // TODO: ShardedWriteController for empty + ShardedWriteController->AddCoveringMessages(); } void Flush() { @@ -664,6 +664,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void SendDataToShard(const ui64 shardId) { + YQL_ENSURE(Mode != EMode::COMMIT); + const auto metadata = ShardedWriteController->GetMessageMetadata(shardId); YQL_ENSURE(metadata); if (metadata->SendAttempts >= MessageSettings.MaxWriteAttempts) { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 6c8cc5a7c0f3..d41d86440970 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -865,8 +865,16 @@ struct TMetadata { }; struct TBatchWithMetadata { - IShardedWriteController::TWriteToken Token; - IPayloadSerializer::IBatchPtr Data; + IShardedWriteController::TWriteToken Token = std::numeric_limits::max(); + IPayloadSerializer::IBatchPtr Data = nullptr; + + bool IsCoveringBatch() const { + return Data != nullptr; + } + + i64 GetMemory() const { + return IsCoveringBatch() ? 0 : Data->GetMemory(); + } }; class TShardsInfo { @@ -902,11 +910,11 @@ class TShardsInfo { i64 dataSize = 0; while (BatchesInFlight < maxCount && BatchesInFlight < Batches.size() - && dataSize + GetBatch(BatchesInFlight).Data->GetMemory() <= maxDataSize) { - dataSize += GetBatch(BatchesInFlight).Data->GetMemory(); + && dataSize + GetBatch(BatchesInFlight).GetMemory() <= maxDataSize) { + dataSize += GetBatch(BatchesInFlight).GetMemory(); ++BatchesInFlight; } - YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).Data->GetMemory() <= maxDataSize); + YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).GetMemory() <= maxDataSize); } const TBatchWithMetadata& GetBatch(size_t index) const { @@ -917,7 +925,7 @@ class TShardsInfo { if (BatchesInFlight != 0 && Cookie == cookie) { ui64 dataSize = 0; for (size_t index = 0; index < BatchesInFlight; ++index) { - dataSize += Batches.front().Data->GetMemory(); + dataSize += Batches.front().GetMemory(); Batches.pop_front(); } @@ -934,7 +942,7 @@ class TShardsInfo { void PushBatch(TBatchWithMetadata&& batch) { YQL_ENSURE(!IsClosed()); Batches.emplace_back(std::move(batch)); - Memory += Batches.back().Data->GetMemory(); + Memory += Batches.back().GetMemory(); } ui64 GetCookie() const { @@ -1155,6 +1163,12 @@ class TShardedWriteController : public IShardedWriteController { ShardsInfo.Close(); } + void AddCoveringMessages() override { + for (auto& [_, shardInfo] : ShardsInfo.GetShards()) { + shardInfo.PushBatch(TBatchWithMetadata{}); + } + } + TVector GetPendingShards() const override { return ShardsInfo.GetPendingShards(); } @@ -1178,8 +1192,7 @@ class TShardedWriteController : public IShardedWriteController { TMessageMetadata meta; meta.Cookie = shardInfo.GetCookie(); meta.OperationsCount = shardInfo.GetBatchesInFlight(); - meta.IsLast = shardInfo.Size() == shardInfo.GetBatchesInFlight(); - meta.IsFinal = shardInfo.IsClosed() && meta.IsLast; + meta.IsFinal = shardInfo.IsClosed() && shardInfo.Size() == shardInfo.GetBatchesInFlight(); meta.SendAttempts = shardInfo.GetSendAttempts(); return meta; @@ -1195,16 +1208,20 @@ class TShardedWriteController : public IShardedWriteController { for (size_t index = 0; index < shardInfo.GetBatchesInFlight(); ++index) { const auto& inFlightBatch = shardInfo.GetBatch(index); - YQL_ENSURE(!inFlightBatch.Data->IsEmpty()); - result.TotalDataSize += inFlightBatch.Data->GetMemory(); - const ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(evWrite) - .AddDataToPayload(inFlightBatch.Data->SerializeToString()); - evWrite.AddOperation( - WriteInfos.at(inFlightBatch.Token).Metadata.OperationType, - WriteInfos.at(inFlightBatch.Token).Metadata.TableId, - WriteInfos.at(inFlightBatch.Token).Serializer->GetWriteColumnIds(), - payloadIndex, - WriteInfos.at(inFlightBatch.Token).Serializer->GetDataFormat()); + if (inFlightBatch.Data) { + YQL_ENSURE(!inFlightBatch.Data->IsEmpty()); + result.TotalDataSize += inFlightBatch.Data->GetMemory(); + const ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(evWrite) + .AddDataToPayload(inFlightBatch.Data->SerializeToString()); + evWrite.AddOperation( + WriteInfos.at(inFlightBatch.Token).Metadata.OperationType, + WriteInfos.at(inFlightBatch.Token).Metadata.TableId, + WriteInfos.at(inFlightBatch.Token).Serializer->GetWriteColumnIds(), + payloadIndex, + WriteInfos.at(inFlightBatch.Token).Serializer->GetDataFormat()); + } else { + YQL_ENSURE(index + 1 == shardInfo.GetBatchesInFlight()); + } } return result; @@ -1346,6 +1363,9 @@ class TShardedWriteController : public IShardedWriteController { for (size_t index = 0; index < shardInfo.Size(); ++index) { const auto& batch = shardInfo.GetBatch(index); const auto& writeInfo = WriteInfos.at(batch.Token); + // Resharding supported only for inconsistent write, + // so convering empty batches don't exist in this case. + YQL_ENSURE(batch.Data); writeInfo.Serializer->AddBatch(batch.Data); } } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index aadadaac51a0..04e01c3bf6c2 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -30,8 +30,11 @@ class IShardedWriteController : public TThrRefBase { TVector&& inputColumns) = 0; virtual void Write(TWriteToken token, const NMiniKQL::TUnboxedValueBatch& data) = 0; virtual void Close(TWriteToken token) = 0; + virtual void Close() = 0; + virtual void AddCoveringMessages() = 0; + virtual TVector GetPendingShards() const = 0; virtual ui64 GetShardsCount() const = 0; virtual TVector GetShardsIds() const = 0; @@ -39,7 +42,7 @@ class IShardedWriteController : public TThrRefBase { struct TMessageMetadata { ui64 Cookie = 0; ui64 OperationsCount = 0; - bool IsLast = false; + //bool IsLast = false; bool IsFinal = false; ui64 SendAttempts = 0; }; From 0b1c1fd8c638a39abba0947117969d2525c6b770 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 20 Sep 2024 13:47:51 +0300 Subject: [PATCH 08/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 47 +++++++++++++++++------- ydb/core/kqp/runtime/kqp_write_table.h | 1 - 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 46ea761e2828..af83bf38a8ac 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -99,6 +99,16 @@ namespace { namespace NKikimr { namespace NKqp { +struct TCommitInfo { + struct TShardInfo { + TVector SendingShards; + TVector ReceivingShards; + }; + + ui64 TxId; + THashMap ShardIdToInfo; +}; + struct IKqpTableWriterCallbacks { virtual ~IKqpTableWriterCallbacks() = default; @@ -637,12 +647,12 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } - void SetPrepare(ui64 txId) { + void SetPrepare(TCommitInfo&& commitInfo) { YQL_ENSURE(Mode == EMode::WRITE); + YQL_ENSURE(!CommitInfo); Mode = EMode::PREPARE; - TxId = txId; + CommitInfo = std::move(commitInfo); ShardedWriteController->AddCoveringMessages(); - // TODO: other shards from prepareInfo } void SetCommit() { @@ -650,10 +660,11 @@ class TKqpTableWriteActor : public TActorBootstrapped { Mode = EMode::COMMIT; } - void SetImmediateCommit(ui64 txId) { + void SetImmediateCommit(TCommitInfo&& commitInfo) { YQL_ENSURE(Mode == EMode::WRITE); + YQL_ENSURE(!CommitInfo); Mode = EMode::IMMEDIATE_COMMIT; - TxId = txId; + CommitInfo = std::move(commitInfo); ShardedWriteController->AddCoveringMessages(); } @@ -692,9 +703,10 @@ class TKqpTableWriteActor : public TActorBootstrapped { : NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); if (isImmediateCommit) { + YQL_ENSURE(CommitInfo); const auto lock = LocksManager.GetLock(shardId); if (lock) { - evWrite->Record.SetTxId(TxId); + evWrite->Record.SetTxId(CommitInfo->TxId); auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); locks->AddSendingShards(shardId); @@ -704,12 +716,17 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } } else if (isPrepare) { - evWrite->Record.SetTxId(TxId); + YQL_ENSURE(CommitInfo); + evWrite->Record.SetTxId(CommitInfo->TxId); auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - // TODO: other shards from prepareInfo - locks->AddSendingShards(shardId); - locks->AddReceivingShards(shardId); + + for (const ui64 sendingShardId : CommitInfo->ShardIdToInfo.at(shardId).SendingShards) { + locks->AddSendingShards(sendingShardId); + } + for (const ui64 receivingShardId : CommitInfo->ShardIdToInfo.at(shardId).ReceivingShards) { + locks->AddReceivingShards(receivingShardId); + } // TODO: multi locks (for tablestore support) const auto lock = LocksManager.GetLock(shardId); @@ -835,7 +852,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Alloc; - ui64 TxId = 0; const TTableId TableId; const TString TablePath; @@ -852,6 +868,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { TLocksManager LocksManager; bool Closed = false; EMode Mode = EMode::WRITE; + std::optional CommitInfo; IShardedWriteControllerPtr ShardedWriteController = nullptr; }; @@ -1285,7 +1302,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub State = EState::PREPARING; // OnPreparedCallback = std::move(callback); for (auto& [_, info] : WriteInfos) { - info.WriteTableActor->SetPrepare(prepareSettings.TxId); + TCommitInfo commitInfo; + commitInfo.TxId = prepareSettings.TxId; + info.WriteTableActor->SetPrepare(std::move(commitInfo)); } Close(); Process(); @@ -1305,7 +1324,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub State = EState::COMMITTING; //OnCommitCallback = std::move(callback); for (auto& [_, info] : WriteInfos) { - info.WriteTableActor->SetImmediateCommit(txId); + TCommitInfo commitInfo; + commitInfo.TxId = txId; + info.WriteTableActor->SetImmediateCommit(std::move(commitInfo)); } Close(); Process(); diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 04e01c3bf6c2..4646f01bec9f 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -42,7 +42,6 @@ class IShardedWriteController : public TThrRefBase { struct TMessageMetadata { ui64 Cookie = 0; ui64 OperationsCount = 0; - //bool IsLast = false; bool IsFinal = false; ui64 SendAttempts = 0; }; From e82a5e9f2c4339301b0c63e7c5b8d0016adb0fe6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 20 Sep 2024 16:43:59 +0300 Subject: [PATCH 09/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 259 +++++++++++------------ 1 file changed, 129 insertions(+), 130 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index af83bf38a8ac..3da741c83a23 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -113,7 +113,7 @@ struct IKqpTableWriterCallbacks { virtual ~IKqpTableWriterCallbacks() = default; // Ready to accept writes - virtual void OnReady(const TTableId& tableId) = 0; + virtual void OnReady() = 0; // EvWrite statuses virtual void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) = 0; @@ -829,7 +829,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { NYql::NDqProto::StatusIds::INTERNAL_ERROR); } - Callbacks->OnReady(TableId); + Callbacks->OnReady(); } void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { @@ -1018,7 +1018,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu Callbacks->ResumeExecution(); } - void OnReady(const TTableId&) override { + void OnReady() override { Process(); } @@ -1138,17 +1138,16 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub , TypeEnv(*Alloc) { Alloc->Release(); - State = EState::WRITING; } void Bootstrap() { LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; - Become(&TKqpBufferWriteActor::StateFuncBuf); + Become(&TKqpBufferWriteActor::StateFunc); } static constexpr char ActorName[] = "KQP_BUFFER_WRITE_ACTOR"; - STFUNC(StateFuncBuf) { + STFUNC(StateFunc) { try { switch (ev->GetTypeRewrite()) { hFunc(TEvKqpBuffer::TEvTerminate, Handle); @@ -1165,13 +1164,39 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub TWriteToken token; if (!ev->Get()->Token) { AFL_ENSURE(ev->Get()->Settings); - token = Open(std::move(*ev->Get()->Settings)); + auto& settings = *ev->Get()->Settings; + if (!WriteInfos.empty()) { + AFL_ENSURE(LockTxId == settings.TransactionSettings.LockTxId); + AFL_ENSURE(LockNodeId == settings.TransactionSettings.LockNodeId); + AFL_ENSURE(InconsistentTx == settings.TransactionSettings.InconsistentTx); + } else { + LockTxId = settings.TransactionSettings.LockTxId; + LockNodeId = settings.TransactionSettings.LockNodeId; + InconsistentTx = settings.TransactionSettings.InconsistentTx; + } + + auto& writeInfo = WriteInfos[settings.TableId]; + if (!writeInfo.WriteTableActor) { + writeInfo.WriteTableActor = new TKqpTableWriteActor( + this, + settings.TableId, + settings.TablePath, + LockTxId, + LockNodeId, + InconsistentTx, + TypeEnv, + Alloc); + writeInfo.WriteTableActorId = RegisterWithSameMailbox(writeInfo.WriteTableActor); + } + + auto cookie = writeInfo.WriteTableActor->Open(settings.OperationType, std::move(settings.Columns)); + token = TWriteToken{settings.TableId, cookie}; } else { token = *ev->Get()->Token; } auto& queue = DataQueues[token.TableId]; - queue.emplace_back(); + queue.emplace(); auto& message = queue.back(); message.Token = token; @@ -1179,91 +1204,105 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub message.Close = ev->Get()->Close; message.Data = ev->Get()->Data; message.Alloc = ev->Get()->Alloc; - - if (HasWrites) { - AFL_ENSURE(LockTxId == ev->Get()->Settings->TransactionSettings.LockTxId); - AFL_ENSURE(LockNodeId == ev->Get()->Settings->TransactionSettings.LockNodeId); - AFL_ENSURE(InconsistentTx == ev->Get()->Settings->TransactionSettings.InconsistentTx); - } else { - LockTxId = ev->Get()->Settings->TransactionSettings.LockTxId; - LockNodeId = ev->Get()->Settings->TransactionSettings.LockNodeId; - InconsistentTx = ev->Get()->Settings->TransactionSettings.InconsistentTx; - HasWrites = true; - } - ProcessQueue(token.TableId); + Process(); } - void ProcessQueue(const TTableId& tableId) { - auto& queue = DataQueues.at(tableId); - auto& writeInfo = WriteInfos.at(tableId); - - if (!writeInfo.WriteTableActor->IsReady()) { - return; - } + void Process() { + ProcessRequestQueue(); + ProcessWrite(); + ProcessAckQueue(); + } - while (!queue.empty()) { - auto& message = queue.front(); + void ProcessRequestQueue() { + for (auto& [tableId, queue] : DataQueues) { + auto& writeInfo = WriteInfos.at(tableId); - if (!message.Data->empty()) { - for (const auto& data : *message.Data) { - Write(message.Token, data); - } - } - if (message.Close) { - Close(message.Token); + if (!writeInfo.WriteTableActor->IsReady()) { + return; } - auto result = std::make_unique(); - result->Token = message.Token; + while (!queue.empty()) { + auto& message = queue.front(); + + if (!message.Data->empty()) { + for (const auto& data : *message.Data) { + writeInfo.WriteTableActor->Write(message.Token.Cookie, data); + } + } + if (message.Close) { + writeInfo.WriteTableActor->Close(message.Token.Cookie); + } - // TODO: send ok only when there are free space - Send(message.From, result.release()); + AckQueue.push(TAckMessage{ + .ForwardActorId = message.From, + .Token = message.Token, + .DataSize = 0, + }); - { - TGuard guard(*message.Alloc); - message.Data = nullptr; + { + TGuard guard(*message.Alloc); + message.Data = nullptr; + } + queue.pop(); } - queue.pop_front(); } - - Process(); } - TWriteToken Open(TWriteSettings&& settings) { - YQL_ENSURE(State == EState::WRITING || State == EState::WAITING); + void ProcessAckQueue() { + while (!AckQueue.empty()) { + const auto& item = AckQueue.front(); + if (GetTotalFreeSpace() >= item.DataSize) { + AckQueue.pop(); + } else { + return; + } + } + } - auto& info = WriteInfos[settings.TableId]; - if (!info.WriteTableActor) { - info.WriteTableActor = new TKqpTableWriteActor( - this, - settings.TableId, - settings.TablePath, - LockTxId, - LockNodeId, - InconsistentTx, - TypeEnv, - Alloc); - info.WriteTableActorId = RegisterWithSameMailbox(info.WriteTableActor); + void ProcessWrite() { + if (GetTotalFreeSpace() <= 0) { State = EState::WAITING; + } else if (State == EState::WAITING && GetTotalFreeSpace() > MessageSettings.InFlightMemoryLimitPerActorBytes / 2) { + ResumeExecution(); } - auto writeToken = info.WriteTableActor->Open(settings.OperationType, std::move(settings.Columns)); - return {settings.TableId, std::move(writeToken)}; - } - - void Write(TWriteToken token, const NMiniKQL::TUnboxedValueBatch& data) { - YQL_ENSURE(State == EState::WRITING || State == EState::WAITING); + const bool needToFlush = (State == EState::WAITING + || State == EState::FLUSHING + || State == EState::PREPARING + || State == EState::COMMITTING + || State == EState::ROLLINGBACK); - auto& info = WriteInfos.at(token.TableId); - info.WriteTableActor->Write(token.Cookie, data); - } + if (needToFlush) { + for (auto& [_, info] : WriteInfos) { + if (info.WriteTableActor->IsReady()) { + info.WriteTableActor->Flush(); + } + } + } - void Close(TWriteToken token) { - YQL_ENSURE(State == EState::WRITING || State == EState::WAITING); + bool isFinished = true; + for (auto& [_, info] : WriteInfos) { + isFinished &= info.WriteTableActor->IsFinished(); + } + if (isFinished) { + CA_LOG_D("Write actor finished"); + switch (State) { + case EState::PREPARING: + break; + case EState::COMMITTING: + break; + case EState::ROLLINGBACK: + break; + case EState::FLUSHING: + //OnFlushedCallback(); + break; + default: + YQL_ENSURE(false); + } - auto& info = WriteInfos.at(token.TableId); - info.WriteTableActor->Close(token.Cookie); + State = EState::FINISHED; + } } THashMap GetLocks(TWriteToken token) const { @@ -1383,7 +1422,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub TGuard guard(*message.Alloc); message.Data = nullptr; } - queue.pop_front(); + queue.pop(); } } @@ -1399,58 +1438,13 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub PassAway(); } - void Process() { - if (GetTotalFreeSpace() <= 0) { - State = EState::WAITING; - } else if (State == EState::WAITING && GetTotalFreeSpace() > MessageSettings.InFlightMemoryLimitPerActorBytes / 2) { - ResumeExecution(); - } - - const bool needToFlush = (State == EState::WAITING - || State == EState::FLUSHING - || State == EState::PREPARING - || State == EState::COMMITTING - || State == EState::ROLLINGBACK); - - if (needToFlush) { - for (auto& [_, info] : WriteInfos) { - if (info.WriteTableActor->IsReady()) { - info.WriteTableActor->Flush(); - } - } - } - - bool isFinished = true; - for (auto& [_, info] : WriteInfos) { - isFinished &= info.WriteTableActor->IsFinished(); - } - if (isFinished) { - CA_LOG_D("Write actor finished"); - switch (State) { - case EState::PREPARING: - break; - case EState::COMMITTING: - break; - case EState::ROLLINGBACK: - break; - case EState::FLUSHING: - //OnFlushedCallback(); - break; - default: - YQL_ENSURE(false); - } - - State = EState::FINISHED; - } - } - void ResumeExecution() { CA_LOG_D("Resuming execution."); State = EState::WRITING; } - void OnReady(const TTableId& tableId) override { - ProcessQueue(tableId); + void OnReady() override { + Process(); } void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) override { @@ -1477,7 +1471,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ReplyErrorAndDie(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) { - CA_LOG_E("Error: " << message << ". statusCode=" << NYql::NDqProto::StatusIds_StatusCode_Name(statusCode) << ". subIssues=" << subIssues.ToString()); + CA_LOG_E(message << ". statusCode=" << NYql::NDqProto::StatusIds_StatusCode_Name(statusCode) << ". subIssues=" << subIssues.ToString()); Send(SessionActorId, new TEvKqpBuffer::TEvError{ message, statusCode, @@ -1492,7 +1486,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub const TActorId SessionActorId; - bool HasWrites = false; ui64 LockTxId = 0; ui64 LockNodeId = 0; bool InconsistentTx = false; @@ -1503,14 +1496,19 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub struct TWriteInfo { TKqpTableWriteActor* WriteTableActor = nullptr; TActorId WriteTableActorId; - - THashMap> ResumeExecutionCallbacks; }; THashMap WriteInfos; EState State; - THashMap> DataQueues; + THashMap> DataQueues; + + struct TAckMessage { + TActorId ForwardActorId; + TWriteToken Token; + i64 DataSize; + }; + std::queue AckQueue; IShardedWriteControllerPtr ShardedWriteController = nullptr; }; @@ -1561,6 +1559,11 @@ class TKqpForwardWriteActor : public TActorBootstrapped, } void Handle(TEvBufferWriteResult::TPtr& result) { + EgressStats.Bytes += DataSize; + EgressStats.Chunks++; + EgressStats.Splits++; + EgressStats.Resume(); + WriteToken = result->Get()->Token; DataSize = 0; { @@ -1605,10 +1608,6 @@ class TKqpForwardWriteActor : public TActorBootstrapped, } AFL_ENSURE(Send(BufferActorId, ev.release())); - EgressStats.Bytes += DataSize; - EgressStats.Chunks++; - EgressStats.Splits++; - EgressStats.Resume(); } void CommitState(const NYql::NDqProto::TCheckpoint&) final {}; From 139236803aae5ffcba222d3e745003d8cc264229 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 20 Sep 2024 19:09:44 +0300 Subject: [PATCH 10/69] fix --- ydb/core/kqp/common/buffer/events.h | 1 + ydb/core/kqp/runtime/kqp_write_actor.cpp | 174 +++++++++++------------ ydb/core/kqp/runtime/kqp_write_table.cpp | 10 ++ ydb/core/kqp/runtime/kqp_write_table.h | 1 + 4 files changed, 94 insertions(+), 92 deletions(-) diff --git a/ydb/core/kqp/common/buffer/events.h b/ydb/core/kqp/common/buffer/events.h index 48a845dc6e1e..8dca571446d0 100644 --- a/ydb/core/kqp/common/buffer/events.h +++ b/ydb/core/kqp/common/buffer/events.h @@ -13,6 +13,7 @@ struct TPrepareSettings { THashSet SendingShards; THashSet ReceivingShards; std::optional ArbiterShard; + std::optional ArbiterColumnShard; }; struct TPreparedInfo { diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 3da741c83a23..b16594f6de5b 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -99,16 +99,6 @@ namespace { namespace NKikimr { namespace NKqp { -struct TCommitInfo { - struct TShardInfo { - TVector SendingShards; - TVector ReceivingShards; - }; - - ui64 TxId; - THashMap ShardIdToInfo; -}; - struct IKqpTableWriterCallbacks { virtual ~IKqpTableWriterCallbacks() = default; @@ -210,6 +200,10 @@ class TKqpTableWriteActor : public TActorBootstrapped { return ShardedWriteController->IsReady(); } + bool IsEmpty() const { + return ShardedWriteController->IsEmpty(); + } + const THashMap& GetLocks() const { return LocksManager.GetLocks(); } @@ -647,24 +641,23 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } - void SetPrepare(TCommitInfo&& commitInfo) { + void SetPrepare(const std::shared_ptr& prepareSettings) { YQL_ENSURE(Mode == EMode::WRITE); - YQL_ENSURE(!CommitInfo); Mode = EMode::PREPARE; - CommitInfo = std::move(commitInfo); + PrepareSettings = prepareSettings; ShardedWriteController->AddCoveringMessages(); } - void SetCommit() { - YQL_ENSURE(Mode == EMode::PREPARE); - Mode = EMode::COMMIT; - } + //void SetCommit() { + // //TODO: do we need it? + // YQL_ENSURE(Mode == EMode::PREPARE); + // Mode = EMode::COMMIT; + //} - void SetImmediateCommit(TCommitInfo&& commitInfo) { + void SetImmediateCommit(ui64 txId) { YQL_ENSURE(Mode == EMode::WRITE); - YQL_ENSURE(!CommitInfo); Mode = EMode::IMMEDIATE_COMMIT; - CommitInfo = std::move(commitInfo); + PrepareSettings->TxId = txId; ShardedWriteController->AddCoveringMessages(); } @@ -703,10 +696,9 @@ class TKqpTableWriteActor : public TActorBootstrapped { : NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); if (isImmediateCommit) { - YQL_ENSURE(CommitInfo); const auto lock = LocksManager.GetLock(shardId); if (lock) { - evWrite->Record.SetTxId(CommitInfo->TxId); + evWrite->Record.SetTxId(PrepareSettings->TxId); auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); locks->AddSendingShards(shardId); @@ -716,16 +708,38 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } } else if (isPrepare) { - YQL_ENSURE(CommitInfo); - evWrite->Record.SetTxId(CommitInfo->TxId); + evWrite->Record.SetTxId(PrepareSettings->TxId); auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - for (const ui64 sendingShardId : CommitInfo->ShardIdToInfo.at(shardId).SendingShards) { - locks->AddSendingShards(sendingShardId); - } - for (const ui64 receivingShardId : CommitInfo->ShardIdToInfo.at(shardId).ReceivingShards) { - locks->AddReceivingShards(receivingShardId); + if (!PrepareSettings->ArbiterColumnShard) { + for (const ui64 sendingShardId : PrepareSettings->SendingShards) { + locks->AddSendingShards(sendingShardId); + } + for (const ui64 receivingShardId : PrepareSettings->ReceivingShards) { + locks->AddReceivingShards(receivingShardId); + } + if (PrepareSettings->ArbiterShard) { + locks->SetArbiterShard(*PrepareSettings->ArbiterShard); + } + } else if (PrepareSettings->ArbiterColumnShard == shardId) { + locks->SetArbiterColumnShard(*PrepareSettings->ArbiterColumnShard); + for (const ui64 sendingShardId : PrepareSettings->SendingShards) { + locks->AddSendingShards(sendingShardId); + } + for (const ui64 receivingShardId : PrepareSettings->ReceivingShards) { + locks->AddReceivingShards(receivingShardId); + } + } else { + locks->SetArbiterColumnShard(*PrepareSettings->ArbiterColumnShard); + locks->AddSendingShards(*PrepareSettings->ArbiterColumnShard); + locks->AddReceivingShards(*PrepareSettings->ArbiterColumnShard); + if (PrepareSettings->SendingShards.contains(shardId)) { + locks->AddSendingShards(shardId); + } + if (PrepareSettings->ReceivingShards.contains(shardId)) { + locks->AddReceivingShards(shardId); + } } // TODO: multi locks (for tablestore support) @@ -868,7 +882,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { TLocksManager LocksManager; bool Closed = false; EMode Mode = EMode::WRITE; - std::optional CommitInfo; + + std::shared_ptr PrepareSettings; IShardedWriteControllerPtr ShardedWriteController = nullptr; }; @@ -1121,13 +1136,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub public: enum class EState { - WAITING, // Out of memory, wait for free memory. Can't accept any writes in this state. - WRITING, // Allow to write data to buffer (there is free memory). + WRITING, // Allow to write data to buffer. FLUSHING, // Force flush (for uncommitted changes visibility). Can't accept any writes in this state. PREPARING, // Do preparation for commit. All writers are closed. New writes wouldn't be accepted. COMMITTING, // Do immediate commit (single shard). All writers are closed. New writes wouldn't be accepted. ROLLINGBACK, // Do rollback. New writes wouldn't be accepted. - FINISHED, }; public: @@ -1261,17 +1274,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ProcessWrite() { - if (GetTotalFreeSpace() <= 0) { - State = EState::WAITING; - } else if (State == EState::WAITING && GetTotalFreeSpace() > MessageSettings.InFlightMemoryLimitPerActorBytes / 2) { - ResumeExecution(); - } - - const bool needToFlush = (State == EState::WAITING + const bool needToFlush = GetTotalFreeSpace() <= 0 || State == EState::FLUSHING || State == EState::PREPARING || State == EState::COMMITTING - || State == EState::ROLLINGBACK); + || State == EState::ROLLINGBACK; if (needToFlush) { for (auto& [_, info] : WriteInfos) { @@ -1281,27 +1288,23 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - bool isFinished = true; - for (auto& [_, info] : WriteInfos) { - isFinished &= info.WriteTableActor->IsFinished(); + if (State == EState::PREPARING) { + bool isFinished = true; + for (auto& [_, info] : WriteInfos) { + isFinished &= info.WriteTableActor->IsFinished(); + } + if (isFinished) { + OnFinished(); + } } - if (isFinished) { - CA_LOG_D("Write actor finished"); - switch (State) { - case EState::PREPARING: - break; - case EState::COMMITTING: - break; - case EState::ROLLINGBACK: - break; - case EState::FLUSHING: - //OnFlushedCallback(); - break; - default: - YQL_ENSURE(false); + if (State == EState::FLUSHING) { + bool isEmpty = true; + for (auto& [_, info] : WriteInfos) { + isEmpty &= info.WriteTableActor->IsEmpty(); + } + if (isEmpty) { + OnFlushed(); } - - State = EState::FINISHED; } } @@ -1329,43 +1332,31 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Flush() { + YQL_ENSURE(State == EState::WRITING); State = EState::FLUSHING; - //OnFlushedCallback = callback; - Close(); Process(); } - void Prepare(TPrepareSettings&& prepareSettings) { + void Prepare(const std::shared_ptr& prepareSettings) { YQL_ENSURE(State == EState::WRITING); - Y_UNUSED(prepareSettings); State = EState::PREPARING; - // OnPreparedCallback = std::move(callback); for (auto& [_, info] : WriteInfos) { - TCommitInfo commitInfo; - commitInfo.TxId = prepareSettings.TxId; - info.WriteTableActor->SetPrepare(std::move(commitInfo)); + info.WriteTableActor->SetPrepare(prepareSettings); } Close(); Process(); } - void OnCommit() { - YQL_ENSURE(State == EState::PREPARING); - State = EState::COMMITTING; - //OnCommitCallback = std::move(callback); - for (auto& [_, info] : WriteInfos) { - info.WriteTableActor->SetCommit(); - } - } + //void OnCommit() { + // YQL_ENSURE(State == EState::PREPARING); + // // TODO: need it? + //} void ImmediateCommit(ui64 txId) { YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; - //OnCommitCallback = std::move(callback); for (auto& [_, info] : WriteInfos) { - TCommitInfo commitInfo; - commitInfo.TxId = txId; - info.WriteTableActor->SetImmediateCommit(std::move(commitInfo)); + info.WriteTableActor->SetImmediateCommit(txId); } Close(); Process(); @@ -1379,10 +1370,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - bool IsFinished() const { - return State == EState::FINISHED; - } - i64 GetFreeSpace(TWriteToken token) const { auto& info = WriteInfos.at(token.TableId); return info.WriteTableActor->IsReady() @@ -1438,11 +1425,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub PassAway(); } - void ResumeExecution() { - CA_LOG_D("Resuming execution."); - State = EState::WRITING; - } - void OnReady() override { Process(); } @@ -1450,15 +1432,14 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) override { AFL_ENSURE(State == EState::PREPARING); Y_UNUSED(preparedInfo, dataSize); - //OnPreparedCallback(std::move(preparedInfo)); + // TODO: collect info for commit Process(); } void OnCommitted(ui64 shardId, ui64 dataSize) override { AFL_ENSURE(State == EState::COMMITTING); Y_UNUSED(shardId, dataSize); - //OnCommitCallback(shardId); - Process(); + // TODO: send result } void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { @@ -1466,6 +1447,15 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Process(); } + void OnFinished() { + // TODO: send collected data + } + + void OnFlushed() { + State = EState::WRITING; + // TODO: send ok + } + void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) override { ReplyErrorAndDie(message, statusCode, subIssues); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index d41d86440970..aed4705f92b2 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -1298,6 +1298,16 @@ class TShardedWriteController : public IShardedWriteController { return true; } + bool IsEmpty() const override { + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + const auto& writeInfo = WriteInfos.at(token); + if (writeInfo.Serializer && writeInfo.Serializer->IsEmpty()) { + return false; + } + } + return ShardsInfo.IsEmpty(); + } + ui64 GetShardsCount() const override { return ShardsInfo.GetShards().size(); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 4646f01bec9f..9b434163ca26 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -70,6 +70,7 @@ class IShardedWriteController : public TThrRefBase { virtual bool IsAllWritesFinished() const = 0; virtual bool IsReady() const = 0; + virtual bool IsEmpty() const = 0; }; using IShardedWriteControllerPtr = TIntrusivePtr; From e6b486ac2a2b8892a495abbe2d56e01b07f52c9f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 21 Sep 2024 15:30:17 +0300 Subject: [PATCH 11/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index b16594f6de5b..23ca59e3fb8c 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -898,6 +898,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu TIntrusivePtr counters) : LogPrefix(TStringBuilder() << "TxId: " << args.TxId << ", task: " << args.TaskId << ". ") , Settings(std::move(settings)) + , MessageSettings(GetWriteActorSettings()) , OutputIndex(args.OutputIndex) , Callbacks(args.Callback) , Counters(counters) @@ -1147,6 +1148,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub TKqpBufferWriteActor( TKqpBufferWriterSettings&& settings) : SessionActorId(settings.SessionActorId) + , MessageSettings(GetWriteActorSettings()) , Alloc(std::make_shared(__LOCATION__)) , TypeEnv(*Alloc) { @@ -1472,9 +1474,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub private: TString LogPrefix; - TWriteActorSettings MessageSettings; - const TActorId SessionActorId; + TWriteActorSettings MessageSettings; ui64 LockTxId = 0; ui64 LockNodeId = 0; @@ -1513,6 +1514,7 @@ class TKqpForwardWriteActor : public TActorBootstrapped, TIntrusivePtr counters) : LogPrefix(TStringBuilder() << "TxId: " << args.TxId << ", task: " << args.TaskId << ". ") , Settings(std::move(settings)) + , MessageSettings(GetWriteActorSettings()) , OutputIndex(args.OutputIndex) , Callbacks(args.Callback) , Counters(counters) From 2dceb3bcdc8c8a2971682dd6db27f3162ac6061f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 21 Sep 2024 15:49:58 +0300 Subject: [PATCH 12/69] fix --- .../kqp/session_actor/kqp_session_actor.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index ab5fc22499ca..b220789fa5ef 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -3,6 +3,7 @@ #include "kqp_query_state.h" #include "kqp_query_stats.h" +#include #include #include #include @@ -1554,6 +1555,22 @@ class TKqpSessionActor : public TActorBootstrapped { } } + void Handle(TEvKqpBuffer::TEvError::TPtr& ev) { + const auto& msg = *ev->Get(); + + TString logMsg = TStringBuilder() << "got TEvKqpBuffer::TEvError in " << CurrentStateFuncName(); + LOG_I(logMsg << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId); + + TString reason = TStringBuilder() << msg.Message << "; " << msg.SubIssues.ToString(); + + if (ExecuterId) { + auto abortEv = MakeHolder(msg.StatusCode, reason); + Send(ExecuterId, abortEv.Release(), IEventHandle::FlagTrackDelivery); + } else { + ReplyQueryError(NYql::NDq::DqStatusToYdbStatus(msg.StatusCode), logMsg, MessageFromIssues(msg.SubIssues)); + } + } + void CollectSystemViewQueryStats(const TKqpQueryStats* stats, TDuration queryDuration, const TString& database, ui64 requestUnits) { @@ -2070,6 +2087,11 @@ class TKqpSessionActor : public TActorBootstrapped { void Cleanup(bool isFinal = false) { isFinal = isFinal || QueryState && !QueryState->KeepSession; + if (BufferActorId) { + Send(BufferActorId, new TEvKqpBuffer::TEvTerminate{}); + BufferActorId = {}; + } + if (QueryState && QueryState->TxCtx) { auto& txCtx = QueryState->TxCtx; if (txCtx->IsInvalidated()) { @@ -2323,6 +2345,7 @@ class TKqpSessionActor : public TActorBootstrapped { hFunc(NWorkload::TEvContinueRequest, HandleNoop); // message from KQP proxy in case of our reply just after kqp proxy timer tick hFunc(NYql::NDq::TEvDq::TEvAbortExecution, HandleNoop); + hFunc(TEvKqpBuffer::TEvError, Handle); hFunc(TEvTxUserProxy::TEvAllocateTxIdResult, HandleNoop); default: @@ -2357,6 +2380,7 @@ class TKqpSessionActor : public TActorBootstrapped { hFunc(TEvKqpExecuter::TEvStreamDataAck, HandleExecute); hFunc(NYql::NDq::TEvDq::TEvAbortExecution, HandleExecute); + hFunc(TEvKqpBuffer::TEvError, Handle); hFunc(TEvKqp::TEvCloseSessionRequest, HandleExecute); hFunc(NGRpcService::TEvClientLost, HandleClientLost); @@ -2404,6 +2428,7 @@ class TKqpSessionActor : public TActorBootstrapped { hFunc(TEvKqp::TEvCompileResponse, HandleNoop); hFunc(TEvKqp::TEvSplitResponse, HandleNoop); hFunc(NYql::NDq::TEvDq::TEvAbortExecution, HandleNoop); + hFunc(TEvKqpBuffer::TEvError, Handle); hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, HandleNoop); hFunc(TEvents::TEvUndelivered, HandleNoop); hFunc(TEvTxUserProxy::TEvAllocateTxIdResult, HandleNoop); @@ -2432,6 +2457,7 @@ class TKqpSessionActor : public TActorBootstrapped { hFunc(TEvents::TEvUndelivered, HandleNoop); hFunc(TEvKqpSnapshot::TEvCreateSnapshotResponse, Handle); hFunc(NWorkload::TEvContinueRequest, HandleNoop); + hFunc(TEvKqpBuffer::TEvError, Handle); } } catch (const yexception& ex) { InternalError(ex.what()); @@ -2578,6 +2604,7 @@ class TKqpSessionActor : public TActorBootstrapped { std::shared_ptr> CompilationCookie; TGUCSettings::TPtr GUCSettings; + TActorId BufferActorId; }; } // namespace From be4131a77c3e006e77609f81ab85410f346aa5fd Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 21 Sep 2024 16:49:17 +0300 Subject: [PATCH 13/69] buffer --- ydb/core/kqp/common/buffer/buffer.h | 15 +++++++++++++++ ydb/core/kqp/runtime/kqp_write_actor.cpp | 1 + ydb/core/kqp/runtime/kqp_write_actor.h | 6 ------ ydb/core/kqp/session_actor/kqp_session_actor.cpp | 3 ++- 4 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 ydb/core/kqp/common/buffer/buffer.h diff --git a/ydb/core/kqp/common/buffer/buffer.h b/ydb/core/kqp/common/buffer/buffer.h new file mode 100644 index 000000000000..98a244061411 --- /dev/null +++ b/ydb/core/kqp/common/buffer/buffer.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace NKikimr { +namespace NKqp { + +struct TKqpBufferWriterSettings { + TActorId SessionActorId; +}; + +NActors::IActor* CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings); + +} +} diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 23ca59e3fb8c..8457cf139192 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/ydb/core/kqp/runtime/kqp_write_actor.h b/ydb/core/kqp/runtime/kqp_write_actor.h index ad19c2545536..53da7d8cbb8a 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.h +++ b/ydb/core/kqp/runtime/kqp_write_actor.h @@ -10,12 +10,6 @@ namespace NKikimr { namespace NKqp { -struct TKqpBufferWriterSettings { - TActorId SessionActorId; -}; - -NActors::IActor* CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings); - void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory&, TIntrusivePtr); } diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index b220789fa5ef..98776b934c0b 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -3,6 +3,7 @@ #include "kqp_query_state.h" #include "kqp_query_stats.h" +#include #include #include #include @@ -1217,7 +1218,7 @@ class TKqpSessionActor : public TActorBootstrapped { request.PerShardKeysSizeLimitBytes = Config->_CommitPerShardKeysSizeLimitBytes.Get().GetRef(); } - if (txCtx.Locks.HasLocks() || txCtx.TopicOperations.HasOperations()) { + if (txCtx.Locks.HasLocks() || txCtx.TopicOperations.HasOperations() || !!BufferActorId) { if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { LOG_D("TExecPhysicalRequest, tx has commit locks"); request.LocksOp = ELocksOp::Commit; From b4c26b7e530290384f94285cbb9865b7c5b9d7e2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 21 Sep 2024 20:09:29 +0300 Subject: [PATCH 14/69] bufferactorid --- ydb/core/kqp/executer_actor/kqp_data_executer.cpp | 9 ++++++--- ydb/core/kqp/executer_actor/kqp_executer.h | 2 +- ydb/core/kqp/executer_actor/kqp_executer_impl.cpp | 11 +++++++---- ydb/core/kqp/executer_actor/kqp_executer_impl.h | 2 +- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 10 +++++++++- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 6fae68411ba9..008586ac280b 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -129,7 +129,8 @@ class TKqpDataExecuter : public TKqpExecuterBase& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, - const TGUCSettings::TPtr& GUCSettings, const TShardIdToTableInfoPtr& shardIdToTableInfo) + const TGUCSettings::TPtr& GUCSettings, + const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId) : TBase(std::move(request), database, userToken, counters, tableServiceConfig, userRequestContext, statementResultIndex, TWilsonKqp::DataExecuter, "DataExecuter", streamResult) , AsyncIoFactory(std::move(asyncIoFactory)) @@ -138,6 +139,7 @@ class TKqpDataExecuter : public TKqpExecuterBase FederatedQuerySetup; const TGUCSettings::TPtr GUCSettings; TShardIdToTableInfoPtr ShardIdToTableInfo; + TActorId BufferActorId; bool HasExternalSources = false; bool SecretSnapshotRequired = false; @@ -2893,11 +2896,11 @@ IActor* CreateKqpDataExecuter(IKqpGateway::TExecPhysicalRequest&& request, const NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo) + const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId) { return new TKqpDataExecuter(std::move(request), database, userToken, counters, streamResult, tableServiceConfig, std::move(asyncIoFactory), creator, userRequestContext, - statementResultIndex, federatedQuerySetup, GUCSettings, shardIdToTableInfo); + statementResultIndex, federatedQuerySetup, GUCSettings, shardIdToTableInfo, bufferActorId); } } // namespace NKqp diff --git a/ydb/core/kqp/executer_actor/kqp_executer.h b/ydb/core/kqp/executer_actor/kqp_executer.h index 7a0fd546eb69..91cb5d436718 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer.h +++ b/ydb/core/kqp/executer_actor/kqp_executer.h @@ -97,7 +97,7 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, TPreparedQueryHolder::TConstPtr preparedQuery, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo); + const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId); IActor* CreateKqpSchemeExecuter( TKqpPhyTxHolder::TConstPtr phyTx, NKikimrKqp::EQueryType queryType, const TActorId& target, diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp index c81fcf313461..9f31733ef463 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp @@ -82,7 +82,7 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt TPreparedQueryHolder::TConstPtr preparedQuery, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo) + const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId) { if (request.Transactions.empty()) { // commit-only or rollback-only data transaction @@ -90,7 +90,8 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt std::move(request), database, userToken, counters, false, tableServiceConfig, std::move(asyncIoFactory), creator, userRequestContext, statementResultIndex, - federatedQuerySetup, /*GUCSettings*/nullptr, shardIdToTableInfo + federatedQuerySetup, /*GUCSettings*/nullptr, + shardIdToTableInfo, bufferActorId ); } @@ -113,7 +114,8 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt std::move(request), database, userToken, counters, false, tableServiceConfig, std::move(asyncIoFactory), creator, userRequestContext, statementResultIndex, - federatedQuerySetup, /*GUCSettings*/nullptr, shardIdToTableInfo + federatedQuerySetup, /*GUCSettings*/nullptr, + shardIdToTableInfo, bufferActorId ); case NKqpProto::TKqpPhyTx::TYPE_SCAN: @@ -128,7 +130,8 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt std::move(request), database, userToken, counters, true, tableServiceConfig, std::move(asyncIoFactory), creator, userRequestContext, statementResultIndex, - federatedQuerySetup, GUCSettings, shardIdToTableInfo + federatedQuerySetup, GUCSettings, + shardIdToTableInfo, bufferActorId ); default: diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index 68712bf016f4..080fb95d11d4 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -2065,7 +2065,7 @@ IActor* CreateKqpDataExecuter(IKqpGateway::TExecPhysicalRequest&& request, const NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo); + const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId); IActor* CreateKqpScanExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TString& database, const TIntrusiveConstPtr& userToken, TKqpRequestCounters::TPtr counters, diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 98776b934c0b..70024dcdc796 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1298,12 +1298,20 @@ class TKqpSessionActor : public TActorBootstrapped { request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); + if (Settings.TableService.GetEnableOltpSink() && !BufferActorId && QueryState->TxCtx->TxHasEffects()) { + TKqpBufferWriterSettings settings { + .SessionActorId = SelfId(), + }; + auto* actor = CreateKqpBufferWriterActor(std::move(settings)); + BufferActorId = RegisterWithSameMailbox(actor); + } auto executerActor = CreateKqpExecuter(std::move(request), Settings.Database, QueryState ? QueryState->UserToken : TIntrusiveConstPtr(), RequestCounters, Settings.TableService, AsyncIoFactory, QueryState ? QueryState->PreparedQuery : nullptr, SelfId(), QueryState ? QueryState->UserRequestContext : MakeIntrusive("", Settings.Database, SessionId), - QueryState ? QueryState->StatementResultIndex : 0, FederatedQuerySetup, GUCSettings, txCtx->ShardIdToTableInfo); + QueryState ? QueryState->StatementResultIndex : 0, FederatedQuerySetup, GUCSettings, + txCtx->ShardIdToTableInfo, BufferActorId); auto exId = RegisterWithSameMailbox(executerActor); LOG_D("Created new KQP executer: " << exId << " isRollback: " << isRollback); From 2ceb5bd26da287b8965e10088ab280b6eef9dbce Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 21 Sep 2024 21:28:38 +0300 Subject: [PATCH 15/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 70024dcdc796..1f08c50cac3d 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1298,7 +1298,7 @@ class TKqpSessionActor : public TActorBootstrapped { request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); - if (Settings.TableService.GetEnableOltpSink() && !BufferActorId && QueryState->TxCtx->TxHasEffects()) { + if (Settings.TableService.GetEnableOltpSink() && !BufferActorId && txCtx->HasOltpTable && request.AcquireLocksTxId.Defined()) { TKqpBufferWriterSettings settings { .SessionActorId = SelfId(), }; From da19b5b3b5999c6a1317a6c5dbb2576173466326 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 22 Sep 2024 21:07:47 +0300 Subject: [PATCH 16/69] immediate-commit --- ydb/core/kqp/common/buffer/events.h | 16 +++--- ydb/core/kqp/common/kqp_tx.h | 2 + ydb/core/kqp/common/simple/kqp_event_ids.h | 3 +- .../kqp/executer_actor/kqp_data_executer.cpp | 57 ++++++++++++++++--- .../kqp/executer_actor/kqp_executer_impl.h | 6 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 44 ++++++++++---- ydb/core/kqp/runtime/kqp_write_table.cpp | 4 +- .../kqp/session_actor/kqp_session_actor.cpp | 23 ++++---- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 2 +- 9 files changed, 114 insertions(+), 43 deletions(-) diff --git a/ydb/core/kqp/common/buffer/events.h b/ydb/core/kqp/common/buffer/events.h index 8dca571446d0..c8e38d10b28c 100644 --- a/ydb/core/kqp/common/buffer/events.h +++ b/ydb/core/kqp/common/buffer/events.h @@ -26,24 +26,22 @@ struct TPreparedInfo { struct TEvKqpBuffer { struct TEvPrepare : public TEventLocal { - TPrepareSettings Settings; -}; - -struct TEvPrepared : public TEventLocal { - TPreparedInfo Result; + TActorId ExecuterActorId; }; struct TEvCommit : public TEventLocal { -}; - -struct TEvCommitted : public TEventLocal { - ui64 ShardId; + TActorId ExecuterActorId; }; struct TEvRollback : public TEventLocal { + TActorId ExecuterActorId; }; struct TEvFlush : public TEventLocal { + TActorId ExecuterActorId; +}; + +struct TEvResult : public TEventLocal { }; struct TEvError : public TEventLocal { diff --git a/ydb/core/kqp/common/kqp_tx.h b/ydb/core/kqp/common/kqp_tx.h index b3aa2106954c..7c58745d766b 100644 --- a/ydb/core/kqp/common/kqp_tx.h +++ b/ydb/core/kqp/common/kqp_tx.h @@ -334,6 +334,8 @@ class TKqpTransactionContext : public NYql::TKikimrTransactionContextBase { bool HasOltpTable = false; bool HasTableWrite = false; + TActorId BufferActorId; + TShardIdToTableInfoPtr ShardIdToTableInfo = std::make_shared(); }; diff --git a/ydb/core/kqp/common/simple/kqp_event_ids.h b/ydb/core/kqp/common/simple/kqp_event_ids.h index da1d3e811198..4fc1b9b87dbd 100644 --- a/ydb/core/kqp/common/simple/kqp_event_ids.h +++ b/ydb/core/kqp/common/simple/kqp_event_ids.h @@ -186,11 +186,10 @@ struct TKqpWorkloadServiceEvents { struct TKqpBufferWriterEvents { enum EKqpBufferWriterEvents { EvPrepare = EventSpaceBegin(TKikimrEvents::ES_KQP) + 800, - EvPrepared, EvCommit, - EvCommitted, EvRollback, EvFlush, + EvResult, EvError, EvTerminate, }; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 008586ac280b..a1f57e7ec4f3 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -8,10 +8,11 @@ #include #include -#include #include +#include #include #include +#include #include #include #include @@ -132,14 +133,14 @@ class TKqpDataExecuter : public TKqpExecuterBase(); + event->ExecuterActorId = SelfId(); + Send(BufferActorId, event.release()); + Become(&TKqpDataExecuter::FinalizeState); + } + } + + STATEFN(FinalizeState) { + switch(ev->GetTypeRewrite()) { + hFunc(TEvKqp::TEvAbortExecution, HandleAbortExecution); + hFunc(TEvKqpBuffer::TEvResult, HandleFinalize); + default: + LOG_W("Unexpected event: " << ev->GetTypeName() << ", at state: FinalizeState"); + } + } + + void HandleFinalize(TEvKqpBuffer::TEvResult::TPtr&) { + MakeResponseAndPassAway(); + } + + void MakeResponseAndPassAway() { YQL_ENSURE(!AlreadyReplied); if (LocksBroken) { YQL_ENSURE(ResponseEv->BrokenLockShardId); @@ -259,12 +284,12 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); - if (!Locks.empty()) { + //if (!Locks.empty()) { if (LockHandle) { ResponseEv->LockHandle = std::move(LockHandle); } - BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); - } + // BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); + //} auto resultSize = ResponseEv->GetByteSize(); if (resultSize > (int)ReplySizeLimit) { @@ -331,6 +356,8 @@ class TKqpDataExecuter : public TKqpExecuterBaseOrbit, TxId); + if (BufferActorId && Request.LocksOp == ELocksOp::Commit) { + // TODO: skip resolving phase? Move it to session actor? + YQL_ENSURE(Request.Transactions.empty()); + auto event = std::make_unique(); + event->ExecuterActorId = SelfId(); + Send(BufferActorId, event.release()); + Become(&TKqpDataExecuter::FinalizeState); + return; + } else if (BufferActorId && Request.LocksOp == ELocksOp::Rollback) { + YQL_ENSURE(Request.Transactions.empty()); + auto event = std::make_unique(); + event->ExecuterActorId = SelfId(); + Send(BufferActorId, event.release()); + Become(&TKqpDataExecuter::FinalizeState); + return; + } + size_t sourceScanPartitionsCount = 0; for (ui32 txIdx = 0; txIdx < Request.Transactions.size(); ++txIdx) { auto& tx = Request.Transactions[txIdx]; @@ -2850,7 +2894,6 @@ class TKqpDataExecuter : public TKqpExecuterBase FederatedQuerySetup; const TGUCSettings::TPtr GUCSettings; TShardIdToTableInfoPtr ShardIdToTableInfo; - TActorId BufferActorId; bool HasExternalSources = false; bool SecretSnapshotRequired = false; diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index 080fb95d11d4..43ca939da447 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -130,8 +130,10 @@ class TKqpExecuterBase : public TActorBootstrapped { TKqpRequestCounters::TPtr counters, const NKikimrConfig::TTableServiceConfig& tableServiceConfig, const TIntrusivePtr& userRequestContext, - ui32 statementResultIndex, ui64 spanVerbosity = 0, TString spanName = "KqpExecuterBase", bool streamResult = false) + ui32 statementResultIndex, ui64 spanVerbosity = 0, TString spanName = "KqpExecuterBase", + bool streamResult = false, const TActorId bufferActorId = {}) : Request(std::move(request)) + , BufferActorId(bufferActorId) , Database(database) , UserToken(userToken) , Counters(counters) @@ -940,6 +942,7 @@ class TKqpExecuterBase : public TActorBootstrapped { settings.SetLockTxId(*lockTxId); settings.SetLockNodeId(SelfId().NodeId()); } + ActorIdToProto(BufferActorId, settings.MutableBufferActorId()); output.SinkSettings.ConstructInPlace(); output.SinkSettings->PackFrom(settings); } else { @@ -1996,6 +1999,7 @@ class TKqpExecuterBase : public TActorBootstrapped { protected: IKqpGateway::TExecPhysicalRequest Request; + TActorId BufferActorId; const TString Database; const TIntrusiveConstPtr UserToken; TKqpRequestCounters::TPtr Counters; diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 8457cf139192..0e6bdb9ef619 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -655,10 +655,11 @@ class TKqpTableWriteActor : public TActorBootstrapped { // Mode = EMode::COMMIT; //} - void SetImmediateCommit(ui64 txId) { + void SetImmediateCommit() { YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::IMMEDIATE_COMMIT; - PrepareSettings->TxId = txId; + + // TODO: check only one shard ShardedWriteController->AddCoveringMessages(); } @@ -699,7 +700,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { if (isImmediateCommit) { const auto lock = LocksManager.GetLock(shardId); if (lock) { - evWrite->Record.SetTxId(PrepareSettings->TxId); auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); locks->AddSendingShards(shardId); @@ -753,7 +753,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } const auto serializationResult = ShardedWriteController->SerializeMessageToPayload(shardId, *evWrite); - YQL_ENSURE(serializationResult.TotalDataSize > 0); + YQL_ENSURE(isPrepare || isImmediateCommit || serializationResult.TotalDataSize > 0); CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", isPrepare=" << isPrepare << ", isImmediateCommit=" << isImmediateCommit << ", TxId=" << evWrite->Record.GetTxId() << ", LockTxId=" << evWrite->Record.GetLockTxId() << ", LockNodeId=" << evWrite->Record.GetLockNodeId() @@ -1153,6 +1153,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub , Alloc(std::make_shared(__LOCATION__)) , TypeEnv(*Alloc) { + State = EState::WRITING; Alloc->Release(); } @@ -1167,6 +1168,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub try { switch (ev->GetTypeRewrite()) { hFunc(TEvKqpBuffer::TEvTerminate, Handle); + hFunc(TEvKqpBuffer::TEvFlush, Handle); + hFunc(TEvKqpBuffer::TEvCommit, Handle); hFunc(TEvBufferWrite, Handle); default: AFL_ENSURE(false)("unknown message", ev->GetTypeRewrite()); @@ -1269,6 +1272,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub while (!AckQueue.empty()) { const auto& item = AckQueue.front(); if (GetTotalFreeSpace() >= item.DataSize) { + auto result = std::make_unique(); + result->Token = AckQueue.front().Token; + Send(AckQueue.front().ForwardActorId, result.release()); AckQueue.pop(); } else { return; @@ -1291,7 +1297,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - if (State == EState::PREPARING) { + /*if (State == EState::PREPARING) { bool isFinished = true; for (auto& [_, info] : WriteInfos) { isFinished &= info.WriteTableActor->IsFinished(); @@ -1299,7 +1305,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub if (isFinished) { OnFinished(); } - } + }*/ if (State == EState::FLUSHING) { bool isEmpty = true; for (auto& [_, info] : WriteInfos) { @@ -1355,11 +1361,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub // // TODO: need it? //} - void ImmediateCommit(ui64 txId) { + void ImmediateCommit() { YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; for (auto& [_, info] : WriteInfos) { - info.WriteTableActor->SetImmediateCommit(txId); + info.WriteTableActor->SetImmediateCommit(); } Close(); Process(); @@ -1428,6 +1434,16 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub PassAway(); } + void Handle(TEvKqpBuffer::TEvFlush::TPtr& ev) { + ExecuterActorId = ev->Get()->ExecuterActorId; + Flush(); + } + + void Handle(TEvKqpBuffer::TEvCommit::TPtr& ev) { + ExecuterActorId = ev->Get()->ExecuterActorId; + ImmediateCommit(); + } + void OnReady() override { Process(); } @@ -1442,7 +1458,12 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnCommitted(ui64 shardId, ui64 dataSize) override { AFL_ENSURE(State == EState::COMMITTING); Y_UNUSED(shardId, dataSize); - // TODO: send result + // TODO: check if everything is committed + if (true) { + Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); + ExecuterActorId = {}; + } + // Process(); // Don't need it? } void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { @@ -1456,7 +1477,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnFlushed() { State = EState::WRITING; - // TODO: send ok + Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); + ExecuterActorId = {}; } void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) override { @@ -1478,6 +1500,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub const TActorId SessionActorId; TWriteActorSettings MessageSettings; + TActorId ExecuterActorId; + ui64 LockTxId = 0; ui64 LockNodeId = 0; bool InconsistentTx = false; diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index aed4705f92b2..71e825a59c01 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -869,7 +869,7 @@ struct TBatchWithMetadata { IPayloadSerializer::IBatchPtr Data = nullptr; bool IsCoveringBatch() const { - return Data != nullptr; + return Data == nullptr; } i64 GetMemory() const { @@ -1301,7 +1301,7 @@ class TShardedWriteController : public IShardedWriteController { bool IsEmpty() const override { for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { const auto& writeInfo = WriteInfos.at(token); - if (writeInfo.Serializer && writeInfo.Serializer->IsEmpty()) { + if (writeInfo.Serializer && !writeInfo.Serializer->IsEmpty()) { return false; } } diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 1f08c50cac3d..c5805b2751d4 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -320,6 +320,7 @@ class TKqpSessionActor : public TActorBootstrapped { return; } + // TODO: support buffer actor bool replied = ExecutePhyTx(/*tx*/ nullptr, /*commit*/ true); if (!replied) { Become(&TKqpSessionActor::ExecuteState); @@ -1218,7 +1219,7 @@ class TKqpSessionActor : public TActorBootstrapped { request.PerShardKeysSizeLimitBytes = Config->_CommitPerShardKeysSizeLimitBytes.Get().GetRef(); } - if (txCtx.Locks.HasLocks() || txCtx.TopicOperations.HasOperations() || !!BufferActorId) { + if (txCtx.Locks.HasLocks() || txCtx.TopicOperations.HasOperations() || !!txCtx.BufferActorId) { if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { LOG_D("TExecPhysicalRequest, tx has commit locks"); request.LocksOp = ELocksOp::Commit; @@ -1298,12 +1299,12 @@ class TKqpSessionActor : public TActorBootstrapped { request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); - if (Settings.TableService.GetEnableOltpSink() && !BufferActorId && txCtx->HasOltpTable && request.AcquireLocksTxId.Defined()) { + if (Settings.TableService.GetEnableOltpSink() && !txCtx->BufferActorId && txCtx->HasOltpTable && request.AcquireLocksTxId.Defined()) { TKqpBufferWriterSettings settings { .SessionActorId = SelfId(), }; auto* actor = CreateKqpBufferWriterActor(std::move(settings)); - BufferActorId = RegisterWithSameMailbox(actor); + txCtx->BufferActorId = RegisterWithSameMailbox(actor); } auto executerActor = CreateKqpExecuter(std::move(request), Settings.Database, QueryState ? QueryState->UserToken : TIntrusiveConstPtr(), @@ -1311,7 +1312,7 @@ class TKqpSessionActor : public TActorBootstrapped { AsyncIoFactory, QueryState ? QueryState->PreparedQuery : nullptr, SelfId(), QueryState ? QueryState->UserRequestContext : MakeIntrusive("", Settings.Database, SessionId), QueryState ? QueryState->StatementResultIndex : 0, FederatedQuerySetup, GUCSettings, - txCtx->ShardIdToTableInfo, BufferActorId); + txCtx->ShardIdToTableInfo, txCtx->BufferActorId); auto exId = RegisterWithSameMailbox(executerActor); LOG_D("Created new KQP executer: " << exId << " isRollback: " << isRollback); @@ -2077,7 +2078,8 @@ class TKqpSessionActor : public TActorBootstrapped { auto dsLock = ExtractLock(lock.GetValueRef(txCtx->Locks.LockType)); request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); } - + + // TODO: support buffer actor SendToExecuter(txCtx, std::move(request), true); } @@ -2086,6 +2088,11 @@ class TKqpSessionActor : public TActorBootstrapped { QueryState->TxCtx->ClearDeferredEffects(); QueryState->TxCtx->Locks.Clear(); QueryState->TxCtx->Finish(); + + if (QueryState->TxCtx->BufferActorId) { + Send(QueryState->TxCtx->BufferActorId, new TEvKqpBuffer::TEvTerminate{}); + QueryState->TxCtx->BufferActorId = {}; + } } } @@ -2096,11 +2103,6 @@ class TKqpSessionActor : public TActorBootstrapped { void Cleanup(bool isFinal = false) { isFinal = isFinal || QueryState && !QueryState->KeepSession; - if (BufferActorId) { - Send(BufferActorId, new TEvKqpBuffer::TEvTerminate{}); - BufferActorId = {}; - } - if (QueryState && QueryState->TxCtx) { auto& txCtx = QueryState->TxCtx; if (txCtx->IsInvalidated()) { @@ -2613,7 +2615,6 @@ class TKqpSessionActor : public TActorBootstrapped { std::shared_ptr> CompilationCookie; TGUCSettings::TPtr GUCSettings; - TActorId BufferActorId; }; } // namespace diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index b80a0d21decb..187371c352d2 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3706,7 +3706,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto it = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col2) VALUES (0u, 0); REPLACE INTO `/Root/DataShard` (Col1, Col3) VALUES (1u, 'test'); - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } From 32a1cc6d3ad3784a0a9d6f28ec7b0558fbac1cc5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 23 Sep 2024 11:15:56 +0300 Subject: [PATCH 17/69] fix --- ydb/core/kqp/common/kqp_tx.h | 68 +--- ydb/core/kqp/common/kqp_tx_manager.cpp | 293 ++++++++++++++++++ ydb/core/kqp/common/kqp_tx_manager.h | 173 +++++++++++ ydb/core/kqp/common/ya.make | 1 + .../kqp/executer_actor/kqp_data_executer.cpp | 13 +- ydb/core/kqp/executer_actor/kqp_executer.h | 2 +- .../kqp/executer_actor/kqp_executer_impl.cpp | 8 +- .../kqp/executer_actor/kqp_executer_impl.h | 6 +- .../kqp/session_actor/kqp_session_actor.cpp | 6 +- .../kqp/session_actor/kqp_session_actor.h | 1 + 10 files changed, 492 insertions(+), 79 deletions(-) create mode 100644 ydb/core/kqp/common/kqp_tx_manager.cpp create mode 100644 ydb/core/kqp/common/kqp_tx_manager.h diff --git a/ydb/core/kqp/common/kqp_tx.h b/ydb/core/kqp/common/kqp_tx.h index 7c58745d766b..2a6fbd9b2c26 100644 --- a/ydb/core/kqp/common/kqp_tx.h +++ b/ydb/core/kqp/common/kqp_tx.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -13,66 +14,6 @@ namespace NKikimr::NKqp { -class TKqpTxLock { -public: - using TKey = std::tuple; - - TKqpTxLock(const NKikimrMiniKQL::TValue& lockValue) - : LockValue(lockValue) {} - - ui64 GetLockId() const { return LockValue.GetStruct(3).GetUint64(); } - ui64 GetDataShard() const { return LockValue.GetStruct(1).GetUint64(); } - ui64 GetSchemeShard() const { return LockValue.GetStruct(5).GetUint64(); } - ui64 GetPathId() const { return LockValue.GetStruct(4).GetUint64(); } - ui32 GetGeneration() const { return LockValue.GetStruct(2).GetUint32(); } - ui64 GetCounter() const { return LockValue.GetStruct(0).GetUint64(); } - bool HasWrites() const { return LockValue.GetStruct(6).GetBool(); } - void SetHasWrites() { - LockValue.MutableStruct(6)->SetBool(true); - } - - TKey GetKey() const { return std::make_tuple(GetLockId(), GetDataShard(), GetSchemeShard(), GetPathId()); } - NKikimrMiniKQL::TValue GetValue() const { return LockValue; } - NYql::NDq::TMkqlValueRef GetValueRef(const NKikimrMiniKQL::TType& type) const { return NYql::NDq::TMkqlValueRef(type, LockValue); } - - bool Invalidated(const TKqpTxLock& newLock) const { - YQL_ENSURE(GetKey() == newLock.GetKey()); - return GetGeneration() != newLock.GetGeneration() || GetCounter() != newLock.GetCounter(); - } - -private: - NKikimrMiniKQL::TValue LockValue; -}; - -struct TKqpTxLocks { - NKikimrMiniKQL::TType LockType; - NKikimrMiniKQL::TListType LocksListType; - THashMap LocksMap; - NLongTxService::TLockHandle LockHandle; - - TMaybe LockIssue; - - bool HasLocks() const { return !LocksMap.empty(); } - bool Broken() const { return LockIssue.Defined(); } - void MarkBroken(NYql::TIssue lockIssue) { LockIssue.ConstructInPlace(std::move(lockIssue)); } - ui64 GetLockTxId() const { return LockHandle ? LockHandle.GetLockId() : HasLocks() ? LocksMap.begin()->second.GetLockId() : 0; } - size_t Size() const { return LocksMap.size(); } - - NYql::TIssue GetIssue() { - Y_ENSURE(LockIssue); - return *LockIssue; - } - - void ReportIssues(NYql::TExprContext& ctx) { - if (LockIssue) - ctx.AddError(*LockIssue); - } - - void Clear() { - LocksMap.clear(); - LockIssue.Clear(); - } -}; struct TDeferredEffect { TKqpPhyTxHolder::TConstPtr PhysicalTx; @@ -121,12 +62,6 @@ struct TDeferredEffects { friend class TKqpTransactionContext; }; -struct TTableInfo { - bool IsOlap = false; - THashSet Pathes; -}; - - class TShardIdToTableInfo { public: const TTableInfo& Get(ui64 shardId) const { @@ -335,6 +270,7 @@ class TKqpTransactionContext : public NYql::TKikimrTransactionContextBase { bool HasTableWrite = false; TActorId BufferActorId; + IKqpTransactionManagerPtr TxManager = nullptr; TShardIdToTableInfoPtr ShardIdToTableInfo = std::make_shared(); }; diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp new file mode 100644 index 000000000000..01b7985e40b5 --- /dev/null +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -0,0 +1,293 @@ +#include "kqp_tx_manager.h" + +#include + +namespace NKikimr { +namespace NKqp { + +namespace { + +class TKqpTransactionManager : public IKqpTransactionManager { + enum ETransactionState { + COLLECTING, + PREPARING, + EXECUTING, + }; +public: + void AddShard(ui64 shardId, bool isOlap, const TString& path) override { + AFL_ENSURE(State == ETransactionState::COLLECTING); + ShardsIds.insert(shardId); + auto& shardInfo = ShardsInfo[shardId]; + shardInfo.IsOlap = isOlap; + + const auto [stringsIter, _] = TablePathes.insert(path); + const TStringBuf pathBuf = *stringsIter; + shardInfo.Pathes.insert(pathBuf); + } + + void AddAction(ui64 shardId, ui8 action) override { + AFL_ENSURE(State == ETransactionState::COLLECTING); + ShardsInfo.at(shardId).Flags |= action; + if (action & EAction::WRITE) { + ReadOnly = false; + } + } + + bool AddLock(ui64 shardId, TKqpTxLock lock) override { + AFL_ENSURE(State == ETransactionState::COLLECTING); + bool isError = (lock.GetCounter() >= NKikimr::TSysTables::TLocksTable::TLock::ErrorMin); + bool isInvalidated = (lock.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken) + || (lock.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorBroken); + bool isLocksAcquireFailure = isError && !isInvalidated; + + auto& shardInfo = ShardsInfo.at(shardId); + if (auto lockPtr = shardInfo.Locks.FindPtr(lock.GetKey()); lockPtr) { + if (lock.HasWrites()) { + lockPtr->Lock.SetHasWrites(); + } + + lockPtr->LocksAcquireFailure |= isLocksAcquireFailure; + if (!lockPtr->LocksAcquireFailure) { + isInvalidated |= lockPtr->Lock.Invalidated(lock); + lockPtr->Invalidated |= isInvalidated; + } + } else { + shardInfo.Locks.emplace( + lock.GetKey(), + TShardInfo::TLockInfo { + .Lock = std::move(lock), + .Invalidated = isInvalidated, + .LocksAcquireFailure = isLocksAcquireFailure, + }); + } + + return !isError && !isInvalidated; + } + + TTableInfo GetShardTableInfo(ui64 shardId) const override { + const auto& info = ShardsInfo.at(shardId); + return TTableInfo{ + .IsOlap = info.IsOlap, + .Pathes = info.Pathes, + }; + } + + EShardState GetState(ui64 shardId) const override { + return ShardsInfo.at(shardId).State; + } + + void SetState(ui64 shardId, EShardState state) override { + ShardsInfo.at(shardId).State = state; + } + + bool IsTxPrepared() const override { + for (const auto& [_, shardInfo] : ShardsInfo) { + if (shardInfo.State != EShardState::PREPARED) { + return false; + } + } + return true; + } + + bool IsTxFinished() const override { + for (const auto& [_, shardInfo] : ShardsInfo) { + if (shardInfo.State != EShardState::FINISHED) { + return false; + } + } + return true; + } + + bool IsReadOnly() const override { + return ReadOnly; + } + + bool IsSingleShard() const override { + return GetShardsCount() == 1; + } + + bool HasSnapshot() const override { + return ValidSnapshot; + } + + void SetHasSnapshot(bool hasSnapshot) override { + ValidSnapshot = hasSnapshot; + } + + TCheckLocksResult CheckLocks() const override { + TCheckLocksResult result; + result.Ok = true; + if (HasSnapshot() && IsReadOnly()) { + // Snapshot read doesn't care about locks. + return result; + } + + for (const auto& [_, shardInfo] : ShardsInfo) { + for (const auto& [_, lockInfo] : shardInfo.Locks) { + if (lockInfo.LocksAcquireFailure) { + result.Ok = false; + result.LocksAcquireFailure = lockInfo.LocksAcquireFailure; + } + if (lockInfo.Invalidated) { + result.Ok = false; + result.BrokenLocks.push_back(lockInfo.Lock); + } + } + } + return result; + } + + const THashSet& GetShards() const override { + return ShardsIds; + } + + ui64 GetShardsCount() const override { + return ShardsIds.size(); + } + + void StartPrepare() override { + AFL_ENSURE(State == ETransactionState::COLLECTING); + AFL_ENSURE(!IsReadOnly()); + + for (const auto& [shardId, shardInfo] : ShardsInfo) { + if (shardInfo.Flags & EAction::WRITE) { + ReceivingShards.insert(shardId); + } + if (shardInfo.Flags & EAction::READ) { + SendingShards.insert(shardId); + } + } + + ShardsToWait = ShardsIds; + + MinStep = std::numeric_limits::min(); + MaxStep = std::numeric_limits::max(); + Coordinator = 0; + + State = ETransactionState::PREPARING; + } + + TPrepareInfo GetPrepareTransactionInfo(ui64 shardId) override { + AFL_ENSURE(State == ETransactionState::PREPARING); + auto& shardInfo = ShardsInfo.at(shardId); + AFL_ENSURE(shardInfo.State == EShardState::PROCESSING); + shardInfo.State = EShardState::PREPARING; + + TPrepareInfo result { + .SendingShards = SendingShards, + .ReceivingShards = ReceivingShards, + .Arbiter = std::nullopt, + .Locks = {}, + }; + + for (const auto& [_, lockInfo] : shardInfo.Locks) { + result.Locks.push_back(lockInfo.Lock); + } + + return result; + } + + bool ConsumePrepareTransactionResult(TPrepareResult&& result) override { + AFL_ENSURE(State == ETransactionState::PREPARING); + auto& shardInfo = ShardsInfo.at(result.ShardId); + AFL_ENSURE(shardInfo.State == EShardState::PREPARING); + shardInfo.State = EShardState::PREPARED; + + ShardsToWait.erase(result.ShardId); + + MinStep = std::max(MinStep, result.MinStep); + MaxStep = std::min(MaxStep, result.MaxStep); + + if (result.Coordinator && !Coordinator) { + Coordinator = result.Coordinator; + } + + AFL_ENSURE(Coordinator && Coordinator == result.Coordinator)("prev_coordinator", Coordinator)("new_coordinator", result.Coordinator); + + return ShardsToWait.empty(); + } + + void StartExecuting() override { + AFL_ENSURE(State == ETransactionState::PREPARING + || (State == ETransactionState::COLLECTING + && IsSingleShard())); + AFL_ENSURE(!IsReadOnly()); + State = ETransactionState::EXECUTING; + + ShardsToWait = ShardsIds; + } + + TCommitInfo GetCommitInfo() override { + AFL_ENSURE(State == ETransactionState::EXECUTING); + TCommitInfo result; + result.MinStep = MinStep; + result.MaxStep = MaxStep; + result.Coordinator = Coordinator; + + for (auto& [shardId, shardInfo] : ShardsInfo) { + result.ShardsInfo.push_back(TCommitShardInfo{ + .ShardId = shardId, + .AffectedFlags = shardInfo.Flags, + }); + + AFL_ENSURE(shardInfo.State == EShardState::PREPARED || shardInfo.State == EShardState::PROCESSING); + shardInfo.State = EShardState::EXECUTING; + } + return result; + } + + bool ConsumeCommitResult(ui64 shardId) override { + AFL_ENSURE(State == ETransactionState::EXECUTING); + auto& shardInfo = ShardsInfo.at(shardId); + AFL_ENSURE(shardInfo.State == EShardState::EXECUTING); + shardInfo.State = EShardState::FINISHED; + + ShardsToWait.erase(shardId); + + return ShardsToWait.empty(); + } + +private: + ETransactionState State = ETransactionState::COLLECTING; + + struct TShardInfo { + EShardState State = EShardState::PROCESSING; + TActionFlags Flags = 0; + + struct TLockInfo { + TKqpTxLock Lock; + bool Invalidated = false; + bool LocksAcquireFailure = false; + }; + + THashMap Locks; + + bool IsOlap = false; + THashSet Pathes; + }; + + THashSet ShardsIds; + THashMap ShardsInfo; + std::unordered_set TablePathes; + + bool ReadOnly = true; + bool ValidSnapshot = false; + + THashSet SendingShards; + THashSet ReceivingShards; + + THashSet ShardsToWait; + + ui64 MinStep = 0; + ui64 MaxStep = 0; + ui64 Coordinator = 0; +}; + +} + +IKqpTransactionManagerPtr CreateKqpTransactionManager() { + return std::make_shared(); +} + +} +} diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h new file mode 100644 index 000000000000..60cd07da03a6 --- /dev/null +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -0,0 +1,173 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace NKikimr { +namespace NKqp { + +class TKqpTxLock { +public: + using TKey = std::tuple; + + TKqpTxLock(const NKikimrMiniKQL::TValue& lockValue) + : LockValue(lockValue) {} + + ui64 GetLockId() const { return LockValue.GetStruct(3).GetUint64(); } + ui64 GetDataShard() const { return LockValue.GetStruct(1).GetUint64(); } + ui64 GetSchemeShard() const { return LockValue.GetStruct(5).GetUint64(); } + ui64 GetPathId() const { return LockValue.GetStruct(4).GetUint64(); } + ui32 GetGeneration() const { return LockValue.GetStruct(2).GetUint32(); } + ui64 GetCounter() const { return LockValue.GetStruct(0).GetUint64(); } + bool HasWrites() const { return LockValue.GetStruct(6).GetBool(); } + void SetHasWrites() { + LockValue.MutableStruct(6)->SetBool(true); + } + + TKey GetKey() const { return std::make_tuple(GetLockId(), GetDataShard(), GetSchemeShard(), GetPathId()); } + NKikimrMiniKQL::TValue GetValue() const { return LockValue; } + NYql::NDq::TMkqlValueRef GetValueRef(const NKikimrMiniKQL::TType& type) const { return NYql::NDq::TMkqlValueRef(type, LockValue); } + + bool Invalidated(const TKqpTxLock& newLock) const { + YQL_ENSURE(GetKey() == newLock.GetKey()); + return GetGeneration() != newLock.GetGeneration() || GetCounter() != newLock.GetCounter(); + } + +private: + NKikimrMiniKQL::TValue LockValue; +}; + +struct TKqpTxLocks { + NKikimrMiniKQL::TType LockType; + NKikimrMiniKQL::TListType LocksListType; + THashMap LocksMap; + NLongTxService::TLockHandle LockHandle; + + TMaybe LockIssue; + + bool HasLocks() const { return !LocksMap.empty(); } + bool Broken() const { return LockIssue.Defined(); } + void MarkBroken(NYql::TIssue lockIssue) { LockIssue.ConstructInPlace(std::move(lockIssue)); } + ui64 GetLockTxId() const { return LockHandle ? LockHandle.GetLockId() : HasLocks() ? LocksMap.begin()->second.GetLockId() : 0; } + size_t Size() const { return LocksMap.size(); } + + NYql::TIssue GetIssue() { + Y_ENSURE(LockIssue); + return *LockIssue; + } + + void ReportIssues(NYql::TExprContext& ctx) { + if (LockIssue) + ctx.AddError(*LockIssue); + } + + void Clear() { + LocksMap.clear(); + LockIssue.Clear(); + } +}; + +struct TTableInfo { + bool IsOlap = false; + THashSet Pathes; +}; + +class IKqpTransactionManager { +public: + virtual ~IKqpTransactionManager() = default; + + enum EShardState { + PROCESSING, + PREPARING, + PREPARED, + EXECUTING, + FINISHED + }; + + enum EAction { + READ = 1, + WRITE = 2, + }; + + using TActionFlags = ui8; + + virtual void AddShard(ui64 shardId, bool isOlap, const TString& path) = 0; + virtual void AddAction(ui64 shardId, ui8 action) = 0; + virtual bool AddLock(ui64 shardId, TKqpTxLock lock) = 0; + + virtual TTableInfo GetShardTableInfo(ui64 shardId) const = 0; + + virtual EShardState GetState(ui64 shardId) const = 0; + virtual void SetState(ui64 shardId, EShardState state) = 0; + + virtual bool IsTxPrepared() const = 0; + virtual bool IsTxFinished() const = 0; + + virtual bool IsReadOnly() const = 0; + virtual bool IsSingleShard() const = 0; + + virtual bool HasSnapshot() const = 0; + virtual void SetHasSnapshot(bool hasSnapshot) = 0; + + struct TCheckLocksResult { + bool Ok = false; + std::vector BrokenLocks; + bool LocksAcquireFailure = false; + }; + virtual TCheckLocksResult CheckLocks() const = 0; + + virtual const THashSet& GetShards() const = 0; + virtual ui64 GetShardsCount() const = 0; + + virtual void StartPrepare() = 0; + + struct TPrepareInfo { + const THashSet& SendingShards; + const THashSet& ReceivingShards; + std::optional Arbiter; // TODO: support volatile + std::optional ArbiterColumnShard; // TODO: support columnshard&topic + TVector Locks; + }; + + virtual TPrepareInfo GetPrepareTransactionInfo(ui64 shardId) = 0; + + struct TPrepareResult { + ui64 ShardId; + ui64 MinStep; + ui64 MaxStep; + ui64 Coordinator; + }; + + virtual bool ConsumePrepareTransactionResult(TPrepareResult&& result) = 0; + + virtual void StartExecuting() = 0; + + struct TCommitShardInfo { + ui64 ShardId; + ui32 AffectedFlags; + }; + + struct TCommitInfo { + ui64 MinStep; + ui64 MaxStep; + ui64 Coordinator; + + TVector ShardsInfo; + }; + + virtual TCommitInfo GetCommitInfo() = 0; + + virtual bool ConsumeCommitResult(ui64 shardId) = 0; +}; + +using IKqpTransactionManagerPtr = std::shared_ptr; + +IKqpTransactionManagerPtr CreateKqpTransactionManager(); + +} +} diff --git a/ydb/core/kqp/common/ya.make b/ydb/core/kqp/common/ya.make index 0559e96e994c..0a8050d7f7e4 100644 --- a/ydb/core/kqp/common/ya.make +++ b/ydb/core/kqp/common/ya.make @@ -11,6 +11,7 @@ SRCS( kqp_script_executions.cpp kqp_timeouts.cpp kqp_timeouts.h + kqp_tx_manager.cpp kqp_tx.cpp kqp_types.cpp kqp_types.h diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index a1f57e7ec4f3..a0fcf76d961f 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -131,10 +132,12 @@ class TKqpDataExecuter : public TKqpExecuterBase& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId) + const TShardIdToTableInfoPtr& shardIdToTableInfo, + const IKqpTransactionManagerPtr& txManager, + const TActorId bufferActorId) : TBase(std::move(request), database, userToken, counters, tableServiceConfig, userRequestContext, statementResultIndex, TWilsonKqp::DataExecuter, - "DataExecuter", streamResult, bufferActorId) + "DataExecuter", streamResult, bufferActorId, txManager) , AsyncIoFactory(std::move(asyncIoFactory)) , UseEvWriteForOltp(tableServiceConfig.GetEnableOltpSink()) , FederatedQuerySetup(federatedQuerySetup) @@ -2939,11 +2942,11 @@ IActor* CreateKqpDataExecuter(IKqpGateway::TExecPhysicalRequest&& request, const NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId) + const TShardIdToTableInfoPtr& shardIdToTableInfo, const IKqpTransactionManagerPtr& txManager, const TActorId bufferActorId) { return new TKqpDataExecuter(std::move(request), database, userToken, counters, streamResult, tableServiceConfig, - std::move(asyncIoFactory), creator, userRequestContext, - statementResultIndex, federatedQuerySetup, GUCSettings, shardIdToTableInfo, bufferActorId); + std::move(asyncIoFactory), creator, userRequestContext, statementResultIndex, federatedQuerySetup, GUCSettings, + shardIdToTableInfo, txManager, bufferActorId); } } // namespace NKqp diff --git a/ydb/core/kqp/executer_actor/kqp_executer.h b/ydb/core/kqp/executer_actor/kqp_executer.h index 91cb5d436718..6bf6d794cf51 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer.h +++ b/ydb/core/kqp/executer_actor/kqp_executer.h @@ -97,7 +97,7 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, TPreparedQueryHolder::TConstPtr preparedQuery, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId); + const TShardIdToTableInfoPtr& shardIdToTableInfo, const IKqpTransactionManagerPtr& txManager, const TActorId bufferActorId); IActor* CreateKqpSchemeExecuter( TKqpPhyTxHolder::TConstPtr phyTx, NKikimrKqp::EQueryType queryType, const TActorId& target, diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp index 9f31733ef463..71be8e351bec 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp @@ -82,7 +82,7 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt TPreparedQueryHolder::TConstPtr preparedQuery, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId) + const TShardIdToTableInfoPtr& shardIdToTableInfo, const IKqpTransactionManagerPtr& txManager, const TActorId bufferActorId) { if (request.Transactions.empty()) { // commit-only or rollback-only data transaction @@ -91,7 +91,7 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt std::move(asyncIoFactory), creator, userRequestContext, statementResultIndex, federatedQuerySetup, /*GUCSettings*/nullptr, - shardIdToTableInfo, bufferActorId + shardIdToTableInfo, txManager, bufferActorId ); } @@ -115,7 +115,7 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt std::move(asyncIoFactory), creator, userRequestContext, statementResultIndex, federatedQuerySetup, /*GUCSettings*/nullptr, - shardIdToTableInfo, bufferActorId + shardIdToTableInfo, txManager, bufferActorId ); case NKqpProto::TKqpPhyTx::TYPE_SCAN: @@ -131,7 +131,7 @@ IActor* CreateKqpExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TSt tableServiceConfig, std::move(asyncIoFactory), creator, userRequestContext, statementResultIndex, federatedQuerySetup, GUCSettings, - shardIdToTableInfo, bufferActorId + shardIdToTableInfo, txManager, bufferActorId ); default: diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index 43ca939da447..fd368f6cb63d 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -131,9 +131,10 @@ class TKqpExecuterBase : public TActorBootstrapped { const NKikimrConfig::TTableServiceConfig& tableServiceConfig, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, ui64 spanVerbosity = 0, TString spanName = "KqpExecuterBase", - bool streamResult = false, const TActorId bufferActorId = {}) + bool streamResult = false, const TActorId bufferActorId = {}, const IKqpTransactionManagerPtr& txManager = nullptr) : Request(std::move(request)) , BufferActorId(bufferActorId) + , TxManager(txManager) , Database(database) , UserToken(userToken) , Counters(counters) @@ -2000,6 +2001,7 @@ class TKqpExecuterBase : public TActorBootstrapped { protected: IKqpGateway::TExecPhysicalRequest Request; TActorId BufferActorId; + IKqpTransactionManagerPtr TxManager; const TString Database; const TIntrusiveConstPtr UserToken; TKqpRequestCounters::TPtr Counters; @@ -2069,7 +2071,7 @@ IActor* CreateKqpDataExecuter(IKqpGateway::TExecPhysicalRequest&& request, const NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const TActorId& creator, const TIntrusivePtr& userRequestContext, ui32 statementResultIndex, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, - const TShardIdToTableInfoPtr& shardIdToTableInfo, const TActorId bufferActorId); + const TShardIdToTableInfoPtr& shardIdToTableInfo, const IKqpTransactionManagerPtr& txManager, const TActorId bufferActorId); IActor* CreateKqpScanExecuter(IKqpGateway::TExecPhysicalRequest&& request, const TString& database, const TIntrusiveConstPtr& userToken, TKqpRequestCounters::TPtr counters, diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index c5805b2751d4..3f2de2ad0c33 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1306,13 +1306,16 @@ class TKqpSessionActor : public TActorBootstrapped { auto* actor = CreateKqpBufferWriterActor(std::move(settings)); txCtx->BufferActorId = RegisterWithSameMailbox(actor); } + if (Settings.TableService.GetEnableOltpSink() && request.AcquireLocksTxId.Defined()) { + txCtx->TxManager = CreateKqpTransactionManager(); + } auto executerActor = CreateKqpExecuter(std::move(request), Settings.Database, QueryState ? QueryState->UserToken : TIntrusiveConstPtr(), RequestCounters, Settings.TableService, AsyncIoFactory, QueryState ? QueryState->PreparedQuery : nullptr, SelfId(), QueryState ? QueryState->UserRequestContext : MakeIntrusive("", Settings.Database, SessionId), QueryState ? QueryState->StatementResultIndex : 0, FederatedQuerySetup, GUCSettings, - txCtx->ShardIdToTableInfo, txCtx->BufferActorId); + txCtx->ShardIdToTableInfo, txCtx->TxManager, txCtx->BufferActorId); auto exId = RegisterWithSameMailbox(executerActor); LOG_D("Created new KQP executer: " << exId << " isRollback: " << isRollback); @@ -2089,6 +2092,7 @@ class TKqpSessionActor : public TActorBootstrapped { QueryState->TxCtx->Locks.Clear(); QueryState->TxCtx->Finish(); + QueryState->TxCtx->TxManager = nullptr; if (QueryState->TxCtx->BufferActorId) { Send(QueryState->TxCtx->BufferActorId, new TEvKqpBuffer::TEvTerminate{}); QueryState->TxCtx->BufferActorId = {}; diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.h b/ydb/core/kqp/session_actor/kqp_session_actor.h index f26fff2b00ca..50ad957d2453 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.h +++ b/ydb/core/kqp/session_actor/kqp_session_actor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include From b884efb1e6472649cc8bea0c73bdc549bd9c103f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 23 Sep 2024 14:15:53 +0300 Subject: [PATCH 18/69] txmanager --- ydb/core/kqp/common/kqp_tx_manager.cpp | 43 +++++++++++++++- ydb/core/kqp/common/kqp_tx_manager.h | 10 +++- .../kqp/executer_actor/kqp_data_executer.cpp | 29 +++++++++-- .../kqp/session_actor/kqp_session_actor.cpp | 50 +++++++++++-------- 4 files changed, 103 insertions(+), 29 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 01b7985e40b5..27b2eebb53b2 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -39,6 +39,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { bool isInvalidated = (lock.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken) || (lock.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorBroken); bool isLocksAcquireFailure = isError && !isInvalidated; + bool broken = false; auto& shardInfo = ShardsInfo.at(shardId); if (auto lockPtr = shardInfo.Locks.FindPtr(lock.GetKey()); lockPtr) { @@ -51,6 +52,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { isInvalidated |= lockPtr->Lock.Invalidated(lock); lockPtr->Invalidated |= isInvalidated; } + broken = lockPtr->Invalidated || lockPtr->LocksAcquireFailure; } else { shardInfo.Locks.emplace( lock.GetKey(), @@ -59,9 +61,33 @@ class TKqpTransactionManager : public IKqpTransactionManager { .Invalidated = isInvalidated, .LocksAcquireFailure = isLocksAcquireFailure, }); + broken = isInvalidated || isLocksAcquireFailure; } - return !isError && !isInvalidated; + if (broken && !LocksIssue) { + const auto& lockInfo = shardInfo.Locks.at(lock.GetKey()); + if (lockInfo.LocksAcquireFailure) { + LocksIssue = YqlIssue(NYql::TPosition(), NYql::TIssuesIds::KIKIMR_LOCKS_ACQUIRE_FAILURE); + return false; + } else if (lockInfo.Invalidated) { + TStringBuilder message; + message << "Transaction locks invalidated. Tables: "; + bool first = true; + // TODO: add error by lock key (pathid) + for (const auto& path : shardInfo.Pathes) { + if (!first) { + message << ", "; + first = false; + } + message << "`" << path << "`"; + } + LocksIssue = YqlIssue(NYql::TPosition(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message); + return false; + } + YQL_ENSURE(false); + } + + return true; } TTableInfo GetShardTableInfo(ui64 shardId) const override { @@ -106,6 +132,10 @@ class TKqpTransactionManager : public IKqpTransactionManager { return GetShardsCount() == 1; } + bool IsEmpty() const override { + return GetShardsCount() == 0; + } + bool HasSnapshot() const override { return ValidSnapshot; } @@ -114,7 +144,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { ValidSnapshot = hasSnapshot; } - TCheckLocksResult CheckLocks() const override { + /*TCheckLocksResult CheckLocks() const override { TCheckLocksResult result; result.Ok = true; if (HasSnapshot() && IsReadOnly()) { @@ -135,6 +165,14 @@ class TKqpTransactionManager : public IKqpTransactionManager { } } return result; + }*/ + + bool BrokenLocks() const override { + return LocksIssue.has_value() && !(HasSnapshot() && IsReadOnly()); + } + + const std::optional& GetLockIssue() const override { + return LocksIssue; } const THashSet& GetShards() const override { @@ -272,6 +310,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { bool ReadOnly = true; bool ValidSnapshot = false; + std::optional LocksIssue; THashSet SendingShards; THashSet ReceivingShards; diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 60cd07da03a6..6a203ad3acd5 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -96,6 +96,9 @@ class IKqpTransactionManager { using TActionFlags = ui8; + // TODO: ??? + // virutal std::optional GetLockTxId() const = 0; + virtual void AddShard(ui64 shardId, bool isOlap, const TString& path) = 0; virtual void AddAction(ui64 shardId, ui8 action) = 0; virtual bool AddLock(ui64 shardId, TKqpTxLock lock) = 0; @@ -110,16 +113,19 @@ class IKqpTransactionManager { virtual bool IsReadOnly() const = 0; virtual bool IsSingleShard() const = 0; + virtual bool IsEmpty() const = 0; virtual bool HasSnapshot() const = 0; virtual void SetHasSnapshot(bool hasSnapshot) = 0; - struct TCheckLocksResult { + /*struct TCheckLocksResult { bool Ok = false; std::vector BrokenLocks; bool LocksAcquireFailure = false; }; - virtual TCheckLocksResult CheckLocks() const = 0; + virtual TCheckLocksResult CheckLocks() const = 0;*/ + virtual bool BrokenLocks() const = 0; + virtual const std::optional& GetLockIssue() const = 0; virtual const THashSet& GetShards() const = 0; virtual ui64 GetShardsCount() const = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index a0fcf76d961f..59516595abab 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -147,6 +147,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseAdd(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); + + if (TxManager) { + TxManager->AddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); + TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); + //TxManager->AddLock(lock.GetDataShard(), lock); + } } } else if (data.GetData().template Is()) { NKikimrKqp::TEvKqpOutputActorResultInfo info; YQL_ENSURE(data.GetData().UnpackTo(&info), "Failed to unpack settings"); for (auto& lock : info.GetLocks()) { - Locks.push_back(lock); + if (!TxManager) { + Locks.push_back(lock); + } const auto& task = TasksGraph.GetTask(taskId); const auto& stageInfo = TasksGraph.GetStageInfo(task.StageId); ShardIdToTableInfo->Add(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); + if (TxManager) { + TxManager->AddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); + TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::WRITE); + // ??? TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); + //TxManager->AddLock(lock.GetDataShard(), lock); + } } } }; @@ -287,12 +304,12 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); - //if (!Locks.empty()) { + if (!Locks.empty() || (TxManager && !TxManager->IsEmpty()) || true) { if (LockHandle) { ResponseEv->LockHandle = std::move(LockHandle); } - // BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); - //} + BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); + } auto resultSize = ResponseEv->GetByteSize(); if (resultSize > (int)ReplySizeLimit) { @@ -1229,6 +1246,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseGet(); ResponseEv->Orbit.Join(res->Orbit); const ui64 shardId = res->GetOrigin(); @@ -2264,6 +2282,7 @@ class TKqpDataExecuter : public TKqpExecuterBase { bool CheckTransactionLocks(const TKqpPhyTxHolder::TConstPtr& tx) { auto& txCtx = *QueryState->TxCtx; - if (!txCtx.DeferredEffects.Empty() && txCtx.Locks.Broken()) { + const bool broken = txCtx.TxManager ? !!txCtx.TxManager->GetLockIssue() : txCtx.Locks.Broken(); + + if (!txCtx.DeferredEffects.Empty() && broken) { ReplyQueryError(Ydb::StatusIds::ABORTED, "tx has deferred effects, but locks are broken", - MessageFromIssues(std::vector{txCtx.Locks.GetIssue()})); + MessageFromIssues(std::vector{txCtx.TxManager ? *txCtx.TxManager->GetLockIssue() : txCtx.Locks.GetIssue()})); return false; } - if (tx && tx->GetHasEffects() && txCtx.Locks.Broken()) { + if (tx && tx->GetHasEffects() && broken) { ReplyQueryError(Ydb::StatusIds::ABORTED, "tx has effects, but locks are broken", - MessageFromIssues(std::vector{txCtx.Locks.GetIssue()})); - return false; + MessageFromIssues(std::vector{txCtx.TxManager ? *txCtx.TxManager->GetLockIssue() : txCtx.Locks.GetIssue()})); } return true; @@ -1155,7 +1156,8 @@ class TKqpSessionActor : public TActorBootstrapped { bool literal = tx && tx->IsLiteralTx(); if (commit) { - if (txCtx.TxHasEffects() || txCtx.Locks.HasLocks() || txCtx.TopicOperations.HasOperations()) { + const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : txCtx.Locks.HasLocks(); + if (txCtx.TxHasEffects() || hasLocks || txCtx.TopicOperations.HasOperations()) { // Cannot perform commit in literal execution literal = false; } else if (!tx) { @@ -1219,7 +1221,8 @@ class TKqpSessionActor : public TActorBootstrapped { request.PerShardKeysSizeLimitBytes = Config->_CommitPerShardKeysSizeLimitBytes.Get().GetRef(); } - if (txCtx.Locks.HasLocks() || txCtx.TopicOperations.HasOperations() || !!txCtx.BufferActorId) { + const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : txCtx.Locks.HasLocks(); + if (hasLocks || txCtx.TopicOperations.HasOperations() || !!txCtx.BufferActorId) { if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { LOG_D("TExecPhysicalRequest, tx has commit locks"); request.LocksOp = ELocksOp::Commit; @@ -1228,11 +1231,14 @@ class TKqpSessionActor : public TActorBootstrapped { request.LocksOp = ELocksOp::Rollback; } - for (auto& [lockId, lock] : txCtx.Locks.LocksMap) { - auto dsLock = ExtractLock(lock.GetValueRef(txCtx.Locks.LockType)); - request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); + if (!txCtx.TxManager) { + for (auto& [lockId, lock] : txCtx.Locks.LocksMap) { + auto dsLock = ExtractLock(lock.GetValueRef(txCtx.Locks.LockType)); + request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); + } + } else { + // TODO: support for non buffer actor writes } - } request.TopicOperations = std::move(txCtx.TopicOperations); @@ -1248,7 +1254,7 @@ class TKqpSessionActor : public TActorBootstrapped { QueryState->Orbit, QueryState->CurrentTx, request.Transactions.size(), - txCtx.Locks.Size(), + (txCtx.TxManager ? txCtx.TxManager->GetShardsCount() : txCtx.Locks.Size()), request.AcquireLocksTxId.Defined()); SendToExecuter(QueryState->TxCtx.Get(), std::move(request)); @@ -1472,10 +1478,12 @@ class TKqpSessionActor : public TActorBootstrapped { // Invalidate query cache on scheme/internal errors switch (status) { case Ydb::StatusIds::ABORTED: { - if (ev->BrokenLockPathId) { + if (ev->BrokenLockPathId && !QueryState->TxCtx->TxManager) { issues.AddIssue(GetLocksInvalidatedIssue(*QueryState->TxCtx, *ev->BrokenLockPathId)); - } else if (ev->BrokenLockShardId) { + } else if (ev->BrokenLockShardId && !QueryState->TxCtx->TxManager) { issues.AddIssue(GetLocksInvalidatedIssue(*QueryState->TxCtx->ShardIdToTableInfo, *ev->BrokenLockShardId)); + } else if (QueryState->TxCtx->TxManager && QueryState->TxCtx->TxManager->BrokenLocks()) { + issues.AddIssue(*QueryState->TxCtx->TxManager->GetLockIssue()); } break; } @@ -1522,7 +1530,7 @@ class TKqpSessionActor : public TActorBootstrapped { QueryState->TxCtx->Locks.LockHandle = std::move(ev->LockHandle); } - if (!MergeLocksWithTxResult(executerResults)) { + if (!QueryState->TxCtx->TxManager && !MergeLocksWithTxResult(executerResults)) { return; } @@ -2077,12 +2085,14 @@ class TKqpSessionActor : public TActorBootstrapped { request.LocksOp = ELocksOp::Rollback; // Should tx with empty LocksMap be aborted? - for (auto& [lockId, lock] : txCtx->Locks.LocksMap) { - auto dsLock = ExtractLock(lock.GetValueRef(txCtx->Locks.LockType)); - request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); + if (!txCtx->TxManager) { + for (auto& [lockId, lock] : txCtx->Locks.LocksMap) { + auto dsLock = ExtractLock(lock.GetValueRef(txCtx->Locks.LockType)); + request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); + } + } else { + // TODO: support buffer actor } - - // TODO: support buffer actor SendToExecuter(txCtx, std::move(request), true); } From e038215c537d3372286944d491798ac414ea1a95 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 23 Sep 2024 16:00:11 +0300 Subject: [PATCH 19/69] fix --- ydb/core/kqp/common/buffer/buffer.h | 2 + .../kqp/executer_actor/kqp_data_executer.cpp | 6 +-- ydb/core/kqp/runtime/kqp_write_actor.cpp | 54 ++++++++++++++----- ydb/core/kqp/runtime/kqp_write_actor.h | 2 +- ydb/core/kqp/runtime/kqp_write_table.cpp | 35 +++++++----- ydb/core/kqp/runtime/kqp_write_table.h | 5 +- .../kqp/session_actor/kqp_session_actor.cpp | 9 ++-- 7 files changed, 76 insertions(+), 37 deletions(-) diff --git a/ydb/core/kqp/common/buffer/buffer.h b/ydb/core/kqp/common/buffer/buffer.h index 98a244061411..75a31e2e0505 100644 --- a/ydb/core/kqp/common/buffer/buffer.h +++ b/ydb/core/kqp/common/buffer/buffer.h @@ -1,12 +1,14 @@ #pragma once #include +#include namespace NKikimr { namespace NKqp { struct TKqpBufferWriterSettings { TActorId SessionActorId; + IKqpTransactionManagerPtr TxManager; }; NActors::IActor* CreateKqpBufferWriterActor(TKqpBufferWriterSettings&& settings); diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 59516595abab..e1bbaffc78fb 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -278,10 +278,8 @@ class TKqpDataExecuter : public TKqpExecuterBaseAdd(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); if (TxManager) { - TxManager->AddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); - TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::WRITE); - // ??? TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); - //TxManager->AddLock(lock.GetDataShard(), lock); + YQL_ENSURE(stageInfo.Meta.TableKind == ETableKind::Olap); + // TODO: } } } diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 0e6bdb9ef619..c0e237b92ddb 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -8,10 +8,12 @@ #include #include #include +#include +#include #include +#include #include #include -#include #include #include #include @@ -21,7 +23,6 @@ #include #include #include -#include namespace { @@ -109,7 +110,7 @@ struct IKqpTableWriterCallbacks { // EvWrite statuses virtual void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) = 0; virtual void OnCommitted(ui64 shardId, ui64 dataSize) = 0; - virtual void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) = 0; + virtual void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) = 0; virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; }; @@ -205,6 +206,15 @@ class TKqpTableWriteActor : public TActorBootstrapped { return ShardedWriteController->IsEmpty(); } + bool IsOlap() const { + YQL_ENSURE(SchemeEntry); + return SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable; + } + + const TString& GetTablePath() const { + return TablePath; + } + const THashMap& GetLocks() const { return LocksManager.GetLocks(); } @@ -612,6 +622,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void ProcessWriteCompletedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { + YQL_ENSURE(SchemeEntry); CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() << ", TabletId=" << ev->Get()->Record.GetOrigin() << ", Cookie=" << ev->Cookie @@ -638,7 +649,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { if (result && (Mode == EMode::COMMIT || Mode == EMode::IMMEDIATE_COMMIT)) { Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); } else if (result) { - Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), result->DataSize, result->IsShardEmpty); + Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), SchemeEntry->TableId, result->DataSize, result->HasRead); } } @@ -832,12 +843,11 @@ class TKqpTableWriteActor : public TActorBootstrapped { try { if (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) { - ShardedWriteController->OnPartitioningChanged(std::move(*SchemeEntry)); + ShardedWriteController->OnPartitioningChanged(*SchemeEntry); } else { - ShardedWriteController->OnPartitioningChanged(std::move(*SchemeEntry), std::move(*SchemeRequest)); + ShardedWriteController->OnPartitioningChanged(*SchemeEntry, std::move(*SchemeRequest)); + SchemeRequest.reset(); } - SchemeEntry.reset(); - SchemeRequest.reset(); } catch (...) { RuntimeError( CurrentExceptionMessage(), @@ -1026,6 +1036,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu void PassAway() override { WriteTableActor->Terminate(); + //TODO: wait for writer actors? TActorBootstrapped::PassAway(); } @@ -1047,8 +1058,8 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu AFL_ENSURE(false); } - void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { - Y_UNUSED(shardId, isShardEmpty); + void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { + Y_UNUSED(shardId, tableId, hasRead); EgressStats.Bytes += dataSize; EgressStats.Chunks++; EgressStats.Splits++; @@ -1150,6 +1161,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub TKqpBufferWriterSettings&& settings) : SessionActorId(settings.SessionActorId) , MessageSettings(GetWriteActorSettings()) + , TxManager(settings.TxManager) , Alloc(std::make_shared(__LOCATION__)) , TypeEnv(*Alloc) { @@ -1317,7 +1329,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - THashMap GetLocks(TWriteToken token) const { + /*THashMap GetLocks(TWriteToken token) const { auto& info = WriteInfos.at(token.TableId); THashMap result; for (const auto& [shardId, lockInfo] : info.WriteTableActor->GetLocks()) { @@ -1338,7 +1350,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } return result; - } + }*/ void Flush() { YQL_ENSURE(State == EState::WRITING); @@ -1466,8 +1478,21 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub // Process(); // Don't need it? } - void OnMessageAcknowledged(ui64 shardId, ui64 dataSize, bool isShardEmpty) override { - Y_UNUSED(shardId, dataSize, isShardEmpty); + void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { + Y_UNUSED(dataSize); + auto& info = WriteInfos.at(tableId); + const auto& lockInfo = info.WriteTableActor->GetLocks().at(shardId); + + const auto lock = lockInfo.GetLock(); + YQL_ENSURE(lock); + YQL_ENSURE(shardId == lock->GetDataShard()); + TxManager->AddShard(shardId, info.WriteTableActor->IsOlap(), info.WriteTableActor->GetTablePath()); + TxManager->AddAction(shardId, IKqpTransactionManager::EAction::WRITE); + if (hasRead) { + TxManager->AddAction(shardId, IKqpTransactionManager::EAction::READ); + } + //TxManager->AddLock(lock->GetDataShard(), lock); + Process(); } @@ -1501,6 +1526,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub TWriteActorSettings MessageSettings; TActorId ExecuterActorId; + IKqpTransactionManagerPtr TxManager; ui64 LockTxId = 0; ui64 LockNodeId = 0; diff --git a/ydb/core/kqp/runtime/kqp_write_actor.h b/ydb/core/kqp/runtime/kqp_write_actor.h index 53da7d8cbb8a..f87062db2289 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.h +++ b/ydb/core/kqp/runtime/kqp_write_actor.h @@ -1,11 +1,11 @@ #pragma once +#include #include #include #include #include #include -#include namespace NKikimr { namespace NKqp { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 71e825a59c01..7892683145de 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -867,6 +867,7 @@ struct TMetadata { struct TBatchWithMetadata { IShardedWriteController::TWriteToken Token = std::numeric_limits::max(); IPayloadSerializer::IBatchPtr Data = nullptr; + bool HasRead = false; bool IsCoveringBatch() const { return Data == nullptr; @@ -921,11 +922,16 @@ class TShardsInfo { return Batches.at(index); } - std::optional PopBatches(const ui64 cookie) { + struct TBatchInfo { + ui64 DataSize = 0; + bool HasRead = false; + }; + std::optional PopBatches(const ui64 cookie) { if (BatchesInFlight != 0 && Cookie == cookie) { - ui64 dataSize = 0; + TBatchInfo result; for (size_t index = 0; index < BatchesInFlight; ++index) { - dataSize += Batches.front().GetMemory(); + result.DataSize += Batches.front().GetMemory(); + result.HasRead = Batches.front().HasRead; Batches.pop_front(); } @@ -933,8 +939,8 @@ class TShardsInfo { SendAttempts = 0; BatchesInFlight = 0; - Memory -= dataSize; - return dataSize; + Memory -= result.DataSize; + return result; } return std::nullopt; } @@ -1051,8 +1057,8 @@ class TShardsInfo { class TShardedWriteController : public IShardedWriteController { public: - void OnPartitioningChanged(NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry) override { - SchemeEntry = std::move(schemeEntry); + void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { + SchemeEntry = schemeEntry; BeforePartitioningChanged(); for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { auto& writeInfo = WriteInfos.at(token); @@ -1064,9 +1070,9 @@ class TShardedWriteController : public IShardedWriteController { } void OnPartitioningChanged( - NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry, + const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { - SchemeEntry = std::move(schemeEntry); + SchemeEntry = schemeEntry; PartitionsEntry = std::move(partitionsEntry); BeforePartitioningChanged(); for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { @@ -1230,11 +1236,12 @@ class TShardedWriteController : public IShardedWriteController { std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { auto allocGuard = TypeEnv.BindAllocator(); auto& shardInfo = ShardsInfo.GetShard(shardId); - const auto removedDataSize = shardInfo.PopBatches(cookie); - if (removedDataSize) { + const auto result = shardInfo.PopBatches(cookie); + if (result) { return TMessageAcknowledgedResult { - .DataSize = *removedDataSize, + .DataSize = result->DataSize, .IsShardEmpty = shardInfo.IsEmpty(), + .HasRead = result->HasRead, }; } return std::nullopt; @@ -1339,6 +1346,8 @@ class TShardedWriteController : public IShardedWriteController { ShardsInfo.GetShard(shardId).PushBatch(TBatchWithMetadata{ .Token = token, .Data = std::move(batch), + .HasRead = (writeInfo.Metadata.OperationType != NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE + && writeInfo.Metadata.OperationType != NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT), }); } } @@ -1354,6 +1363,8 @@ class TShardedWriteController : public IShardedWriteController { shard.PushBatch(TBatchWithMetadata{ .Token = token, .Data = std::move(batch), + .HasRead = (writeInfo.Metadata.OperationType != NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE + && writeInfo.Metadata.OperationType != NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT), }); } } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 9b434163ca26..d71f21b20b7f 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -13,9 +13,9 @@ namespace NKqp { class IShardedWriteController : public TThrRefBase { public: virtual void OnPartitioningChanged( - NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry) = 0; + const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) = 0; virtual void OnPartitioningChanged( - NSchemeCache::TSchemeCacheNavigate::TEntry&& schemeEntry, + const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) = 0; using TWriteToken = ui64; @@ -57,6 +57,7 @@ class IShardedWriteController : public TThrRefBase { struct TMessageAcknowledgedResult { ui64 DataSize = 0; bool IsShardEmpty = 0; + bool HasRead = false; }; virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index c26da0de0fff..6e54b401fb8e 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1305,16 +1305,17 @@ class TKqpSessionActor : public TActorBootstrapped { request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); + if (Settings.TableService.GetEnableOltpSink() && request.AcquireLocksTxId.Defined()) { + txCtx->TxManager = CreateKqpTransactionManager(); + } if (Settings.TableService.GetEnableOltpSink() && !txCtx->BufferActorId && txCtx->HasOltpTable && request.AcquireLocksTxId.Defined()) { TKqpBufferWriterSettings settings { .SessionActorId = SelfId(), + .TxManager = txCtx->TxManager, }; auto* actor = CreateKqpBufferWriterActor(std::move(settings)); txCtx->BufferActorId = RegisterWithSameMailbox(actor); } - if (Settings.TableService.GetEnableOltpSink() && request.AcquireLocksTxId.Defined()) { - txCtx->TxManager = CreateKqpTransactionManager(); - } auto executerActor = CreateKqpExecuter(std::move(request), Settings.Database, QueryState ? QueryState->UserToken : TIntrusiveConstPtr(), RequestCounters, Settings.TableService, @@ -1580,7 +1581,7 @@ class TKqpSessionActor : public TActorBootstrapped { const auto& msg = *ev->Get(); TString logMsg = TStringBuilder() << "got TEvKqpBuffer::TEvError in " << CurrentStateFuncName(); - LOG_I(logMsg << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId); + LOG_W(logMsg << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId); TString reason = TStringBuilder() << msg.Message << "; " << msg.SubIssues.ToString(); From 06d6be3e86ed284f721b962587b6ce3b71c8fc1c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 23 Sep 2024 16:30:57 +0300 Subject: [PATCH 20/69] fix --- ydb/core/kqp/common/kqp_tx.h | 60 +++++++++++++++++ ydb/core/kqp/common/kqp_tx_manager.cpp | 16 ++--- ydb/core/kqp/common/kqp_tx_manager.h | 64 +++---------------- .../kqp/executer_actor/kqp_data_executer.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- 5 files changed, 79 insertions(+), 65 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx.h b/ydb/core/kqp/common/kqp_tx.h index 2a6fbd9b2c26..b4a4b6f9accb 100644 --- a/ydb/core/kqp/common/kqp_tx.h +++ b/ydb/core/kqp/common/kqp_tx.h @@ -14,6 +14,66 @@ namespace NKikimr::NKqp { +class TKqpTxLock { +public: + using TKey = std::tuple; + + TKqpTxLock(const NKikimrMiniKQL::TValue& lockValue) + : LockValue(lockValue) {} + + ui64 GetLockId() const { return LockValue.GetStruct(3).GetUint64(); } + ui64 GetDataShard() const { return LockValue.GetStruct(1).GetUint64(); } + ui64 GetSchemeShard() const { return LockValue.GetStruct(5).GetUint64(); } + ui64 GetPathId() const { return LockValue.GetStruct(4).GetUint64(); } + ui32 GetGeneration() const { return LockValue.GetStruct(2).GetUint32(); } + ui64 GetCounter() const { return LockValue.GetStruct(0).GetUint64(); } + bool HasWrites() const { return LockValue.GetStruct(6).GetBool(); } + void SetHasWrites() { + LockValue.MutableStruct(6)->SetBool(true); + } + + TKey GetKey() const { return std::make_tuple(GetLockId(), GetDataShard(), GetSchemeShard(), GetPathId()); } + NKikimrMiniKQL::TValue GetValue() const { return LockValue; } + NYql::NDq::TMkqlValueRef GetValueRef(const NKikimrMiniKQL::TType& type) const { return NYql::NDq::TMkqlValueRef(type, LockValue); } + + bool Invalidated(const TKqpTxLock& newLock) const { + YQL_ENSURE(GetKey() == newLock.GetKey()); + return GetGeneration() != newLock.GetGeneration() || GetCounter() != newLock.GetCounter(); + } + +private: + NKikimrMiniKQL::TValue LockValue; +}; + +struct TKqpTxLocks { + NKikimrMiniKQL::TType LockType; + NKikimrMiniKQL::TListType LocksListType; + THashMap LocksMap; + NLongTxService::TLockHandle LockHandle; + + TMaybe LockIssue; + + bool HasLocks() const { return !LocksMap.empty(); } + bool Broken() const { return LockIssue.Defined(); } + void MarkBroken(NYql::TIssue lockIssue) { LockIssue.ConstructInPlace(std::move(lockIssue)); } + ui64 GetLockTxId() const { return LockHandle ? LockHandle.GetLockId() : HasLocks() ? LocksMap.begin()->second.GetLockId() : 0; } + size_t Size() const { return LocksMap.size(); } + + NYql::TIssue GetIssue() { + Y_ENSURE(LockIssue); + return *LockIssue; + } + + void ReportIssues(NYql::TExprContext& ctx) { + if (LockIssue) + ctx.AddError(*LockIssue); + } + + void Clear() { + LocksMap.clear(); + LockIssue.Clear(); + } +}; struct TDeferredEffect { TKqpPhyTxHolder::TConstPtr PhysicalTx; diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 27b2eebb53b2..1738c89fae0a 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -33,18 +33,18 @@ class TKqpTransactionManager : public IKqpTransactionManager { } } - bool AddLock(ui64 shardId, TKqpTxLock lock) override { + bool AddLock(ui64 shardId, TKqpLock lock) override { AFL_ENSURE(State == ETransactionState::COLLECTING); - bool isError = (lock.GetCounter() >= NKikimr::TSysTables::TLocksTable::TLock::ErrorMin); - bool isInvalidated = (lock.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken) - || (lock.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorBroken); + bool isError = (lock.Proto.GetCounter() >= NKikimr::TSysTables::TLocksTable::TLock::ErrorMin); + bool isInvalidated = (lock.Proto.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken) + || (lock.Proto.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorBroken); bool isLocksAcquireFailure = isError && !isInvalidated; bool broken = false; auto& shardInfo = ShardsInfo.at(shardId); if (auto lockPtr = shardInfo.Locks.FindPtr(lock.GetKey()); lockPtr) { - if (lock.HasWrites()) { - lockPtr->Lock.SetHasWrites(); + if (lock.Proto.GetHasWrites()) { + lockPtr->Lock.Proto.SetHasWrites(true); } lockPtr->LocksAcquireFailure |= isLocksAcquireFailure; @@ -293,12 +293,12 @@ class TKqpTransactionManager : public IKqpTransactionManager { TActionFlags Flags = 0; struct TLockInfo { - TKqpTxLock Lock; + TKqpLock Lock; bool Invalidated = false; bool LocksAcquireFailure = false; }; - THashMap Locks; + THashMap Locks; bool IsOlap = false; THashSet Pathes; diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 6a203ad3acd5..9458d24bd6f4 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -11,65 +11,19 @@ namespace NKikimr { namespace NKqp { -class TKqpTxLock { -public: +struct TKqpLock { using TKey = std::tuple; + TKey GetKey() const { return std::make_tuple(Proto.GetLockId(), Proto.GetDataShard(), Proto.GetSchemeShard(), Proto.GetPathId()); } - TKqpTxLock(const NKikimrMiniKQL::TValue& lockValue) - : LockValue(lockValue) {} - - ui64 GetLockId() const { return LockValue.GetStruct(3).GetUint64(); } - ui64 GetDataShard() const { return LockValue.GetStruct(1).GetUint64(); } - ui64 GetSchemeShard() const { return LockValue.GetStruct(5).GetUint64(); } - ui64 GetPathId() const { return LockValue.GetStruct(4).GetUint64(); } - ui32 GetGeneration() const { return LockValue.GetStruct(2).GetUint32(); } - ui64 GetCounter() const { return LockValue.GetStruct(0).GetUint64(); } - bool HasWrites() const { return LockValue.GetStruct(6).GetBool(); } - void SetHasWrites() { - LockValue.MutableStruct(6)->SetBool(true); - } - - TKey GetKey() const { return std::make_tuple(GetLockId(), GetDataShard(), GetSchemeShard(), GetPathId()); } - NKikimrMiniKQL::TValue GetValue() const { return LockValue; } - NYql::NDq::TMkqlValueRef GetValueRef(const NKikimrMiniKQL::TType& type) const { return NYql::NDq::TMkqlValueRef(type, LockValue); } - - bool Invalidated(const TKqpTxLock& newLock) const { + bool Invalidated(const TKqpLock& newLock) const { YQL_ENSURE(GetKey() == newLock.GetKey()); - return GetGeneration() != newLock.GetGeneration() || GetCounter() != newLock.GetCounter(); + return Proto.GetGeneration() != newLock.Proto.GetGeneration() || Proto.GetCounter() != newLock.Proto.GetCounter(); } -private: - NKikimrMiniKQL::TValue LockValue; -}; - -struct TKqpTxLocks { - NKikimrMiniKQL::TType LockType; - NKikimrMiniKQL::TListType LocksListType; - THashMap LocksMap; - NLongTxService::TLockHandle LockHandle; - - TMaybe LockIssue; + TKqpLock(const NKikimrDataEvents::TLock& proto) + : Proto(proto) {} - bool HasLocks() const { return !LocksMap.empty(); } - bool Broken() const { return LockIssue.Defined(); } - void MarkBroken(NYql::TIssue lockIssue) { LockIssue.ConstructInPlace(std::move(lockIssue)); } - ui64 GetLockTxId() const { return LockHandle ? LockHandle.GetLockId() : HasLocks() ? LocksMap.begin()->second.GetLockId() : 0; } - size_t Size() const { return LocksMap.size(); } - - NYql::TIssue GetIssue() { - Y_ENSURE(LockIssue); - return *LockIssue; - } - - void ReportIssues(NYql::TExprContext& ctx) { - if (LockIssue) - ctx.AddError(*LockIssue); - } - - void Clear() { - LocksMap.clear(); - LockIssue.Clear(); - } + NKikimrDataEvents::TLock Proto; }; struct TTableInfo { @@ -101,7 +55,7 @@ class IKqpTransactionManager { virtual void AddShard(ui64 shardId, bool isOlap, const TString& path) = 0; virtual void AddAction(ui64 shardId, ui8 action) = 0; - virtual bool AddLock(ui64 shardId, TKqpTxLock lock) = 0; + virtual bool AddLock(ui64 shardId, TKqpLock lock) = 0; virtual TTableInfo GetShardTableInfo(ui64 shardId) const = 0; @@ -137,7 +91,7 @@ class IKqpTransactionManager { const THashSet& ReceivingShards; std::optional Arbiter; // TODO: support volatile std::optional ArbiterColumnShard; // TODO: support columnshard&topic - TVector Locks; + TVector Locks; }; virtual TPrepareInfo GetPrepareTransactionInfo(ui64 shardId) = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index e1bbaffc78fb..eb003f07808e 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -263,7 +263,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseAddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); - //TxManager->AddLock(lock.GetDataShard(), lock); + TxManager->AddLock(lock.GetDataShard(), lock); } } } else if (data.GetData().template Is()) { diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index c0e237b92ddb..00ff0ab06cca 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -1491,7 +1491,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub if (hasRead) { TxManager->AddAction(shardId, IKqpTransactionManager::EAction::READ); } - //TxManager->AddLock(lock->GetDataShard(), lock); + TxManager->AddLock(lock->GetDataShard(), *lock); Process(); } From 1031e3f676ae73ae7ac0e46fa3b815ea8380acf9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 23 Sep 2024 17:58:15 +0300 Subject: [PATCH 21/69] fix --- ydb/core/kqp/common/kqp_tx_manager.cpp | 7 +++++++ .../kqp/executer_actor/kqp_data_executer.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 16 ++++++++-------- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 2 ++ 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 1738c89fae0a..794463fbc579 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -252,6 +252,13 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(!IsReadOnly()); State = ETransactionState::EXECUTING; + for (auto& [_, shardInfo] : ShardsInfo) { + AFL_ENSURE(shardInfo.State == EShardState::PREPARED + || (shardInfo.State == EShardState::PROCESSING + && IsSingleShard())); + shardInfo.State = EShardState::EXECUTING; + } + ShardsToWait = ShardsIds; } diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index eb003f07808e..a06ed94d77cb 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -302,7 +302,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); - if (!Locks.empty() || (TxManager && !TxManager->IsEmpty()) || true) { + if (!Locks.empty() || (TxManager && !TxManager->IsEmpty())) { if (LockHandle) { ResponseEv->LockHandle = std::move(LockHandle); } diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 00ff0ab06cca..fa4f6b6a91ae 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -1368,11 +1368,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Process(); } - //void OnCommit() { - // YQL_ENSURE(State == EState::PREPARING); - // // TODO: need it? - //} - void ImmediateCommit() { YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; @@ -1453,7 +1448,13 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Handle(TEvKqpBuffer::TEvCommit::TPtr& ev) { ExecuterActorId = ev->Get()->ExecuterActorId; - ImmediateCommit(); + YQL_ENSURE(!TxManager->IsReadOnly()); + if (TxManager->IsSingleShard()) { + TxManager->StartExecuting(); + ImmediateCommit(); + } else { + //Prepare(); + } } void OnReady() override { @@ -1470,8 +1471,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnCommitted(ui64 shardId, ui64 dataSize) override { AFL_ENSURE(State == EState::COMMITTING); Y_UNUSED(shardId, dataSize); - // TODO: check if everything is committed - if (true) { + if (TxManager->ConsumeCommitResult(shardId)) { Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); ExecuterActorId = {}; } diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 6e54b401fb8e..6b25135fbf85 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -2242,6 +2242,7 @@ class TKqpSessionActor : public TActorBootstrapped { } void EndCleanup(bool isFinal) { + Cerr << "EndCleanup, isFinal: " << isFinal << Endl; LOG_D("EndCleanup, isFinal: " << isFinal); if (QueryResponse) @@ -2318,6 +2319,7 @@ class TKqpSessionActor : public TActorBootstrapped { FillTxInfo(response); ExecuterId = TActorId{}; + Cerr << "HERE" << Endl; Cleanup(IsFatalError(ydbStatus)); } From 329219651290bdaaf44a97ead27eee2ce4f8a3b4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 10:17:07 +0300 Subject: [PATCH 22/69] fix --- ydb/core/kqp/common/buffer/events.h | 9 +-- ydb/core/kqp/common/kqp_tx_manager.cpp | 2 +- ydb/core/kqp/common/kqp_tx_manager.h | 2 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 97 +++++++++++++++++++----- 4 files changed, 82 insertions(+), 28 deletions(-) diff --git a/ydb/core/kqp/common/buffer/events.h b/ydb/core/kqp/common/buffer/events.h index c8e38d10b28c..86246666e747 100644 --- a/ydb/core/kqp/common/buffer/events.h +++ b/ydb/core/kqp/common/buffer/events.h @@ -8,14 +8,6 @@ namespace NKikimr { namespace NKqp { -struct TPrepareSettings { - ui64 TxId; - THashSet SendingShards; - THashSet ReceivingShards; - std::optional ArbiterShard; - std::optional ArbiterColumnShard; -}; - struct TPreparedInfo { ui64 ShardId; ui64 MinStep; @@ -31,6 +23,7 @@ struct TEvPrepare : public TEventLocal { TActorId ExecuterActorId; + ui64 TxId; }; struct TEvRollback : public TEventLocal { diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 794463fbc579..4feb917f5b7c 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -245,7 +245,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { return ShardsToWait.empty(); } - void StartExecuting() override { + void StartExecute() override { AFL_ENSURE(State == ETransactionState::PREPARING || (State == ETransactionState::COLLECTING && IsSingleShard())); diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 9458d24bd6f4..fef63afe667c 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -105,7 +105,7 @@ class IKqpTransactionManager { virtual bool ConsumePrepareTransactionResult(TPrepareResult&& result) = 0; - virtual void StartExecuting() = 0; + virtual void StartExecute() = 0; struct TCommitShardInfo { ui64 ShardId; diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index fa4f6b6a91ae..fc9ea22fada1 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -115,6 +115,57 @@ struct IKqpTableWriterCallbacks { virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; }; +/*class TKqpReadOnlyCommitActor : public TActorBootstrapped { + using TBase = TActorBootstrapped; + + struct TEvPrivate { + enum EEv { + EvTerminate, + }; + + struct TEvTerminate : public TEventLocal { + }; + }; + +public: + TKqpReadOnlyCommitActor( + IKqpTableWriterCallbacks* callbacks, + IKqpTransactionManagerPtr txManager) { + } + + void Bootstrap() { + LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; + Become(&TKqpReadOnlyCommitActor::StatePreparing); + } + + static constexpr char ActorName[] = "KQP_TABLE_WRITE_ACTOR"; + + STFUNC(StateProcessing) { + try { + switch (ev->GetTypeRewrite()) { + } + } catch (const yexception& e) { + RuntimeError(e.what(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); + } + } + + void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { + Callbacks->OnError(message, statusCode, subIssues); + } + + void PassAway() override {; + Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); + TActorBootstrapped::PassAway(); + } + + void Terminate() { + Send(this->SelfId(), new TEvPrivate::TEvTerminate{}); + } + +private: + IKqpTransactionManagerPtr TxManager; +};*/ + class TKqpTableWriteActor : public TActorBootstrapped { using TBase = TActorBootstrapped; @@ -156,7 +207,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { const ui64 lockNodeId, const bool inconsistentTx, const NMiniKQL::TTypeEnvironment& typeEnv, - std::shared_ptr alloc) + std::shared_ptr alloc, + const IKqpTransactionManagerPtr& txManager) : TypeEnv(typeEnv) , Alloc(alloc) , TableId(tableId) @@ -165,7 +217,9 @@ class TKqpTableWriteActor : public TActorBootstrapped { , LockNodeId(lockNodeId) , InconsistentTx(inconsistentTx) , Callbacks(callbacks) + // , TxManager(txManager) { + Y_UNUSED(txManager); try { ShardedWriteController = CreateShardedWriteController( TShardedWriteControllerSettings { @@ -646,17 +700,17 @@ class TKqpTableWriteActor : public TActorBootstrapped { const auto result = ShardedWriteController->OnMessageAcknowledged( ev->Get()->Record.GetOrigin(), ev->Cookie); - if (result && (Mode == EMode::COMMIT || Mode == EMode::IMMEDIATE_COMMIT)) { + if (result && result->IsShardEmpty && (Mode == EMode::COMMIT || Mode == EMode::IMMEDIATE_COMMIT)) { Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); } else if (result) { Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), SchemeEntry->TableId, result->DataSize, result->HasRead); } } - void SetPrepare(const std::shared_ptr& prepareSettings) { + void SetPrepare() { YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::PREPARE; - PrepareSettings = prepareSettings; + //PrepareSettings = prepareSettings; ShardedWriteController->AddCoveringMessages(); } @@ -670,7 +724,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::IMMEDIATE_COMMIT; - // TODO: check only one shard + YQL_ENSURE(ShardedWriteController->GetShardsCount() == 1); ShardedWriteController->AddCoveringMessages(); } @@ -720,7 +774,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } } else if (isPrepare) { - evWrite->Record.SetTxId(PrepareSettings->TxId); + /*evWrite->Record.SetTxId(PrepareSettings->TxId); auto* locks = evWrite->Record.MutableLocks(); locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); @@ -752,12 +806,12 @@ class TKqpTableWriteActor : public TActorBootstrapped { if (PrepareSettings->ReceivingShards.contains(shardId)) { locks->AddReceivingShards(shardId); } - } + }*/ // TODO: multi locks (for tablestore support) const auto lock = LocksManager.GetLock(shardId); if (lock) { - *locks->AddLocks() = *lock; + //*locks->AddLocks() = *lock; } } else if (!InconsistentTx) { evWrite->SetLockId(LockTxId, LockNodeId); @@ -873,7 +927,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { NActors::TActorId PipeCacheId = NKikimr::MakePipePerNodeCacheID(false); TString LogPrefix; - TWriteActorSettings MessageSettings; // TODO: fill it + TWriteActorSettings MessageSettings; const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Alloc; @@ -891,10 +945,11 @@ class TKqpTableWriteActor : public TActorBootstrapped { ui64 ResolveAttempts = 0; TLocksManager LocksManager; + //IKqpTransactionManagerPtr TxManager; bool Closed = false; EMode Mode = EMode::WRITE; - std::shared_ptr PrepareSettings; + //std::shared_ptr PrepareSettings; IShardedWriteControllerPtr ShardedWriteController = nullptr; }; @@ -935,7 +990,8 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu Settings.GetLockNodeId(), Settings.GetInconsistentTx(), TypeEnv, - Alloc); + Alloc, + nullptr); WriteTableActorId = RegisterWithSameMailbox(WriteTableActor); @@ -1216,7 +1272,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub LockNodeId, InconsistentTx, TypeEnv, - Alloc); + Alloc, + TxManager); writeInfo.WriteTableActorId = RegisterWithSameMailbox(writeInfo.WriteTableActor); } @@ -1358,12 +1415,15 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Process(); } - void Prepare(const std::shared_ptr& prepareSettings) { + void Prepare(const ui64 txId) { YQL_ENSURE(State == EState::WRITING); State = EState::PREPARING; - for (auto& [_, info] : WriteInfos) { - info.WriteTableActor->SetPrepare(prepareSettings); - } + Y_UNUSED(txId); + // TODO: Additional actor for reads commit + //for (auto& [_, info] : WriteInfos) { + //info.WriteTableActor->SetPrepare(prepareSettings); + //GetShardsIds() + //} Close(); Process(); } @@ -1450,10 +1510,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub ExecuterActorId = ev->Get()->ExecuterActorId; YQL_ENSURE(!TxManager->IsReadOnly()); if (TxManager->IsSingleShard()) { - TxManager->StartExecuting(); + TxManager->StartExecute(); ImmediateCommit(); } else { - //Prepare(); + TxManager->StartPrepare(); + Prepare(ev->Get()->TxId); } } From d73263204b37f07ef4956b3336f1f6b6d9981923 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 12:34:25 +0300 Subject: [PATCH 23/69] fix --- ydb/core/kqp/common/kqp_tx_manager.cpp | 57 +++++++-- ydb/core/kqp/common/kqp_tx_manager.h | 28 +---- ydb/core/kqp/runtime/kqp_write_actor.cpp | 141 ++++++++--------------- ydb/core/kqp/runtime/kqp_write_table.cpp | 23 ++-- ydb/core/kqp/runtime/kqp_write_table.h | 7 +- 5 files changed, 118 insertions(+), 138 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 4feb917f5b7c..10bd9bec3e04 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -7,6 +7,21 @@ namespace NKqp { namespace { +struct TKqpLock { + using TKey = std::tuple; + TKey GetKey() const { return std::make_tuple(Proto.GetLockId(), Proto.GetDataShard(), Proto.GetSchemeShard(), Proto.GetPathId()); } + + bool Invalidated(const TKqpLock& newLock) const { + YQL_ENSURE(GetKey() == newLock.GetKey()); + return Proto.GetGeneration() != newLock.Proto.GetGeneration() || Proto.GetCounter() != newLock.Proto.GetCounter(); + } + + TKqpLock(const NKikimrDataEvents::TLock& proto) + : Proto(proto) {} + + NKikimrDataEvents::TLock Proto; +}; + class TKqpTransactionManager : public IKqpTransactionManager { enum ETransactionState { COLLECTING, @@ -14,6 +29,9 @@ class TKqpTransactionManager : public IKqpTransactionManager { EXECUTING, }; public: + TKqpTransactionManager(bool collectOnly) + : CollectOnly(collectOnly) {} + void AddShard(ui64 shardId, bool isOlap, const TString& path) override { AFL_ENSURE(State == ETransactionState::COLLECTING); ShardsIds.insert(shardId); @@ -33,7 +51,8 @@ class TKqpTransactionManager : public IKqpTransactionManager { } } - bool AddLock(ui64 shardId, TKqpLock lock) override { + bool AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lockProto) override { + TKqpLock lock(lockProto); AFL_ENSURE(State == ETransactionState::COLLECTING); bool isError = (lock.Proto.GetCounter() >= NKikimr::TSysTables::TLocksTable::TLock::ErrorMin); bool isInvalidated = (lock.Proto.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken) @@ -106,6 +125,25 @@ class TKqpTransactionManager : public IKqpTransactionManager { ShardsInfo.at(shardId).State = state; } + TVector GetLocks() const override { + TVector locks; + for (const auto& [_, shardInfo] : ShardsInfo) { + for (const auto& [_, lockInfo] : shardInfo.Locks) { + locks.push_back(lockInfo.Lock.Proto); + } + } + return locks; + } + + TVector GetLocks(ui64 shardId) const override { + TVector locks; + const auto& shardInfo = ShardsInfo.at(shardId); + for (const auto& [_, lockInfo] : shardInfo.Locks) { + locks.push_back(lockInfo.Lock.Proto); + } + return locks; + } + bool IsTxPrepared() const override { for (const auto& [_, shardInfo] : ShardsInfo) { if (shardInfo.State != EShardState::PREPARED) { @@ -184,6 +222,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } void StartPrepare() override { + YQL_ENSURE(!CollectOnly); AFL_ENSURE(State == ETransactionState::COLLECTING); AFL_ENSURE(!IsReadOnly()); @@ -205,23 +244,15 @@ class TKqpTransactionManager : public IKqpTransactionManager { State = ETransactionState::PREPARING; } - TPrepareInfo GetPrepareTransactionInfo(ui64 shardId) override { + TPrepareInfo GetPrepareTransactionInfo() override { AFL_ENSURE(State == ETransactionState::PREPARING); - auto& shardInfo = ShardsInfo.at(shardId); - AFL_ENSURE(shardInfo.State == EShardState::PROCESSING); - shardInfo.State = EShardState::PREPARING; TPrepareInfo result { .SendingShards = SendingShards, .ReceivingShards = ReceivingShards, .Arbiter = std::nullopt, - .Locks = {}, }; - for (const auto& [_, lockInfo] : shardInfo.Locks) { - result.Locks.push_back(lockInfo.Lock); - } - return result; } @@ -246,6 +277,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } void StartExecute() override { + YQL_ENSURE(!CollectOnly); AFL_ENSURE(State == ETransactionState::PREPARING || (State == ETransactionState::COLLECTING && IsSingleShard())); @@ -293,6 +325,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } private: + bool CollectOnly = false; ETransactionState State = ETransactionState::COLLECTING; struct TShardInfo { @@ -331,8 +364,8 @@ class TKqpTransactionManager : public IKqpTransactionManager { } -IKqpTransactionManagerPtr CreateKqpTransactionManager() { - return std::make_shared(); +IKqpTransactionManagerPtr CreateKqpTransactionManager(bool collectOnly) { + return std::make_shared(collectOnly); } } diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index fef63afe667c..2f4d21290cff 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -11,21 +11,6 @@ namespace NKikimr { namespace NKqp { -struct TKqpLock { - using TKey = std::tuple; - TKey GetKey() const { return std::make_tuple(Proto.GetLockId(), Proto.GetDataShard(), Proto.GetSchemeShard(), Proto.GetPathId()); } - - bool Invalidated(const TKqpLock& newLock) const { - YQL_ENSURE(GetKey() == newLock.GetKey()); - return Proto.GetGeneration() != newLock.Proto.GetGeneration() || Proto.GetCounter() != newLock.Proto.GetCounter(); - } - - TKqpLock(const NKikimrDataEvents::TLock& proto) - : Proto(proto) {} - - NKikimrDataEvents::TLock Proto; -}; - struct TTableInfo { bool IsOlap = false; THashSet Pathes; @@ -50,15 +35,15 @@ class IKqpTransactionManager { using TActionFlags = ui8; - // TODO: ??? - // virutal std::optional GetLockTxId() const = 0; - virtual void AddShard(ui64 shardId, bool isOlap, const TString& path) = 0; virtual void AddAction(ui64 shardId, ui8 action) = 0; - virtual bool AddLock(ui64 shardId, TKqpLock lock) = 0; + virtual bool AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lock) = 0; virtual TTableInfo GetShardTableInfo(ui64 shardId) const = 0; + virtual TVector GetLocks() const = 0; + virtual TVector GetLocks(ui64 shardId) const = 0; + virtual EShardState GetState(ui64 shardId) const = 0; virtual void SetState(ui64 shardId, EShardState state) = 0; @@ -91,10 +76,9 @@ class IKqpTransactionManager { const THashSet& ReceivingShards; std::optional Arbiter; // TODO: support volatile std::optional ArbiterColumnShard; // TODO: support columnshard&topic - TVector Locks; }; - virtual TPrepareInfo GetPrepareTransactionInfo(ui64 shardId) = 0; + virtual TPrepareInfo GetPrepareTransactionInfo() = 0; struct TPrepareResult { ui64 ShardId; @@ -127,7 +111,7 @@ class IKqpTransactionManager { using IKqpTransactionManagerPtr = std::shared_ptr; -IKqpTransactionManagerPtr CreateKqpTransactionManager(); +IKqpTransactionManagerPtr CreateKqpTransactionManager(bool collectOnly = false); } } diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index fc9ea22fada1..2bd168570c2b 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -38,47 +38,6 @@ namespace { return delay; } - struct TLockInfo { - bool AddAndCheckLock(const NKikimrDataEvents::TLock& lock) { - if (!Lock) { - Lock = lock; - return true; - } else { - return lock.GetLockId() == Lock->GetLockId() - && lock.GetDataShard() == Lock->GetDataShard() - && lock.GetSchemeShard() == Lock->GetSchemeShard() - && lock.GetPathId() == Lock->GetPathId() - && lock.GetGeneration() == Lock->GetGeneration() - && lock.GetCounter() == Lock->GetCounter(); - } - } - - const std::optional& GetLock() const { - return Lock; - } - - private: - std::optional Lock; - }; - - class TLocksManager { - public: - bool AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lock) { - return Locks[shardId].AddAndCheckLock(lock); - } - - const std::optional& GetLock(ui64 shardId) { - return Locks[shardId].GetLock(); - } - - const THashMap& GetLocks() const { - return Locks; - } - - private: - THashMap Locks; - }; - NKikimrDataEvents::TEvWrite::TOperation::EOperationType GetOperation(NKikimrKqp::TKqpTableSinkSettings::EType type) { switch (type) { case NKikimrKqp::TKqpTableSinkSettings::MODE_REPLACE: @@ -115,8 +74,8 @@ struct IKqpTableWriterCallbacks { virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; }; -/*class TKqpReadOnlyCommitActor : public TActorBootstrapped { - using TBase = TActorBootstrapped; +/*class TKqpShardsCommitActor : public TActorBootstrapped { + using TBase = TActorBootstrapped; struct TEvPrivate { enum EEv { @@ -128,14 +87,14 @@ struct IKqpTableWriterCallbacks { }; public: - TKqpReadOnlyCommitActor( + TKqpShardsCommitActor( IKqpTableWriterCallbacks* callbacks, IKqpTransactionManagerPtr txManager) { } void Bootstrap() { LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; - Become(&TKqpReadOnlyCommitActor::StatePreparing); + Become(&TKqpShardsCommitActor::StatePreparing); } static constexpr char ActorName[] = "KQP_TABLE_WRITE_ACTOR"; @@ -155,7 +114,7 @@ struct IKqpTableWriterCallbacks { void PassAway() override {; Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); - TActorBootstrapped::PassAway(); + TActorBootstrapped::PassAway(); } void Terminate() { @@ -217,9 +176,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { , LockNodeId(lockNodeId) , InconsistentTx(inconsistentTx) , Callbacks(callbacks) - // , TxManager(txManager) + , TxManager(txManager ? txManager : CreateKqpTransactionManager(/* collectOnly= */ true)) { - Y_UNUSED(txManager); try { ShardedWriteController = CreateShardedWriteController( TShardedWriteControllerSettings { @@ -265,12 +223,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { return SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable; } - const TString& GetTablePath() const { - return TablePath; - } - - const THashMap& GetLocks() const { - return LocksManager.GetLocks(); + TVector GetLocks() const { + return TxManager->GetLocks(); } TVector GetShardsIds() const { @@ -302,6 +256,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(ShardedWriteController); try { ShardedWriteController->Write(token, data); + UpdateShards(); } catch (...) { RuntimeError( CurrentExceptionMessage(), @@ -314,6 +269,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(ShardedWriteController); try { ShardedWriteController->Close(token); + UpdateShards(); } catch (...) { RuntimeError( CurrentExceptionMessage(), @@ -329,6 +285,17 @@ class TKqpTableWriteActor : public TActorBootstrapped { ShardedWriteController->Close(); } + void UpdateShards() { + // Maybe there are better ways to initialize shards... + for (const auto& shardInfo : ShardedWriteController->GetPendingShards()) { + TxManager->AddShard(shardInfo.ShardId, IsOlap(), TablePath); + TxManager->AddAction(shardInfo.ShardId, IKqpTransactionManager::EAction::WRITE); + if (shardInfo.HasRead) { + TxManager->AddAction(shardInfo.ShardId, IKqpTransactionManager::EAction::READ); + } + } + } + bool IsClosed() const { return Closed; } @@ -671,6 +638,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { const auto result = ShardedWriteController->OnMessageAcknowledged( ev->Get()->Record.GetOrigin(), ev->Cookie); if (result) { + YQL_ENSURE(result->IsShardEmpty); Callbacks->OnPrepared(std::move(preparedInfo), result->DataSize); } } @@ -689,12 +657,15 @@ class TKqpTableWriteActor : public TActorBootstrapped { }()); for (const auto& lock : ev->Get()->Record.GetTxLocks()) { - if (!LocksManager.AddLock(ev->Get()->Record.GetOrigin(), lock)) { + if (!TxManager->AddLock(ev->Get()->Record.GetOrigin(), lock)) { + YQL_ENSURE(TxManager->BrokenLocks()); + NYql::TIssues issues; + issues.AddIssue(*TxManager->GetLockIssue()); RuntimeError( - TStringBuilder() << "Transaction locks invalidated. Table `" - << SchemeEntry->TableId.PathId.ToString() << "`.", + TStringBuilder() << "Transaction locks invalidated.", NYql::NDqProto::StatusIds::ABORTED, - NYql::TIssues{}); + issues); + return; } } @@ -703,7 +674,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { if (result && result->IsShardEmpty && (Mode == EMode::COMMIT || Mode == EMode::IMMEDIATE_COMMIT)) { Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); } else if (result) { - Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), SchemeEntry->TableId, result->DataSize, result->HasRead); + Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), SchemeEntry->TableId, result->DataSize, false); } } @@ -729,8 +700,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void Flush() { - for (const size_t shardId : ShardedWriteController->GetPendingShards()) { - SendDataToShard(shardId); + for (const auto& shardInfo : ShardedWriteController->GetPendingShards()) { + SendDataToShard(shardInfo.ShardId); } } @@ -763,14 +734,14 @@ class TKqpTableWriteActor : public TActorBootstrapped { : NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); if (isImmediateCommit) { - const auto lock = LocksManager.GetLock(shardId); - if (lock) { - auto* locks = evWrite->Record.MutableLocks(); - locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - locks->AddSendingShards(shardId); - locks->AddReceivingShards(shardId); - if (lock) { - *locks->AddLocks() = *lock; + const auto locks = TxManager->GetLocks(shardId); + if (!locks.empty()) { + auto* protoLocks = evWrite->Record.MutableLocks(); + protoLocks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + protoLocks->AddSendingShards(shardId); + protoLocks->AddReceivingShards(shardId); + for (const auto& lock : locks) { + *protoLocks->AddLocks() = lock; } } } else if (isPrepare) { @@ -809,10 +780,10 @@ class TKqpTableWriteActor : public TActorBootstrapped { }*/ // TODO: multi locks (for tablestore support) - const auto lock = LocksManager.GetLock(shardId); - if (lock) { + //const auto lock = LocksManager.GetLock(shardId); + //if (lock) { //*locks->AddLocks() = *lock; - } + //} } else if (!InconsistentTx) { evWrite->SetLockId(LockTxId, LockNodeId); } @@ -874,7 +845,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void Handle(TEvPrivate::TEvTerminate::TPtr&) { - Become(&TKqpTableWriteActor::StateTerminating); PassAway(); } @@ -921,6 +891,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void Terminate() { + Become(&TKqpTableWriteActor::StateTerminating); Send(this->SelfId(), new TEvPrivate::TEvTerminate{}); } @@ -944,8 +915,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { std::optional SchemeRequest; ui64 ResolveAttempts = 0; - TLocksManager LocksManager; - //IKqpTransactionManagerPtr TxManager; + IKqpTransactionManagerPtr TxManager; bool Closed = false; EMode Mode = EMode::WRITE; @@ -1035,10 +1005,8 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu TMaybe ExtraData() override { NKikimrKqp::TEvKqpOutputActorResultInfo resultInfo; - for (const auto& [_, lockInfo] : WriteTableActor->GetLocks()) { - if (const auto lock = lockInfo.GetLock(); lock) { - resultInfo.AddLocks()->CopyFrom(*lock); - } + for (const auto& lock : WriteTableActor->GetLocks()) { + resultInfo.AddLocks()->CopyFrom(lock); } google::protobuf::Any result; result.PackFrom(resultInfo); @@ -1540,20 +1508,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { - Y_UNUSED(dataSize); - auto& info = WriteInfos.at(tableId); - const auto& lockInfo = info.WriteTableActor->GetLocks().at(shardId); - - const auto lock = lockInfo.GetLock(); - YQL_ENSURE(lock); - YQL_ENSURE(shardId == lock->GetDataShard()); - TxManager->AddShard(shardId, info.WriteTableActor->IsOlap(), info.WriteTableActor->GetTablePath()); - TxManager->AddAction(shardId, IKqpTransactionManager::EAction::WRITE); - if (hasRead) { - TxManager->AddAction(shardId, IKqpTransactionManager::EAction::READ); - } - TxManager->AddLock(lock->GetDataShard(), *lock); - + Y_UNUSED(dataSize, shardId, tableId, hasRead); // TODO: delete unused Process(); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 7892683145de..4bdc0c0145e9 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -19,7 +19,6 @@ namespace { constexpr ui64 DataShardMaxOperationBytes = 8_MB; constexpr ui64 ColumnShardMaxOperationBytes = 64_MB; -constexpr ui64 MaxUnshardedBatchBytes = 0_MB; class IPayloadSerializer : public TThrRefBase { public: @@ -421,7 +420,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } void FlushUnsharded(bool force) { - if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() > MaxUnshardedBatchBytes) { + if (BatchBuilder.Bytes() > 0 && force) { const auto unshardedBatch = BatchBuilder.FlushBatch(true); YQL_ENSURE(unshardedBatch); ShardAndFlushBatch(unshardedBatch, force); @@ -924,14 +923,12 @@ class TShardsInfo { struct TBatchInfo { ui64 DataSize = 0; - bool HasRead = false; }; std::optional PopBatches(const ui64 cookie) { if (BatchesInFlight != 0 && Cookie == cookie) { TBatchInfo result; for (size_t index = 0; index < BatchesInFlight; ++index) { result.DataSize += Batches.front().GetMemory(); - result.HasRead = Batches.front().HasRead; Batches.pop_front(); } @@ -949,6 +946,7 @@ class TShardsInfo { YQL_ENSURE(!IsClosed()); Batches.emplace_back(std::move(batch)); Memory += Batches.back().GetMemory(); + HasReadInBatch |= Batches.back().HasRead; } ui64 GetCookie() const { @@ -971,9 +969,14 @@ class TShardsInfo { SendAttempts = 0; } + bool HasRead() const { + return HasReadInBatch; + } + private: std::deque Batches; i64& Memory; + bool HasReadInBatch = false; ui64& NextCookie; ui64 Cookie; @@ -994,11 +997,14 @@ class TShardsInfo { return insertIt->second; } - TVector GetPendingShards() const { - TVector result; + TVector GetPendingShards() const { + TVector result; for (const auto& [id, shard] : ShardsInfo) { if (!shard.IsEmpty() && shard.GetSendAttempts() == 0) { - result.push_back(id); + result.push_back(IShardedWriteController::TPendingShardInfo{ + .ShardId = id, + .HasRead = shard.HasRead(), + }); } } return result; @@ -1175,7 +1181,7 @@ class TShardedWriteController : public IShardedWriteController { } } - TVector GetPendingShards() const override { + TVector GetPendingShards() const override { return ShardsInfo.GetPendingShards(); } @@ -1241,7 +1247,6 @@ class TShardedWriteController : public IShardedWriteController { return TMessageAcknowledgedResult { .DataSize = result->DataSize, .IsShardEmpty = shardInfo.IsEmpty(), - .HasRead = result->HasRead, }; } return std::nullopt; diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index d71f21b20b7f..8dd583654856 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -35,7 +35,11 @@ class IShardedWriteController : public TThrRefBase { virtual void AddCoveringMessages() = 0; - virtual TVector GetPendingShards() const = 0; + struct TPendingShardInfo { + ui64 ShardId; + bool HasRead; + }; + virtual TVector GetPendingShards() const = 0; virtual ui64 GetShardsCount() const = 0; virtual TVector GetShardsIds() const = 0; @@ -57,7 +61,6 @@ class IShardedWriteController : public TThrRefBase { struct TMessageAcknowledgedResult { ui64 DataSize = 0; bool IsShardEmpty = 0; - bool HasRead = false; }; virtual std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) = 0; From d8de81e467cd68cbf45d48eb6caaed624fb82fdc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 12:35:48 +0300 Subject: [PATCH 24/69] get-rid-of-old-locks-check --- ydb/core/kqp/common/kqp_tx_manager.cpp | 23 ----------------------- ydb/core/kqp/common/kqp_tx_manager.h | 6 ------ 2 files changed, 29 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 10bd9bec3e04..1af2fa4ccbc0 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -182,29 +182,6 @@ class TKqpTransactionManager : public IKqpTransactionManager { ValidSnapshot = hasSnapshot; } - /*TCheckLocksResult CheckLocks() const override { - TCheckLocksResult result; - result.Ok = true; - if (HasSnapshot() && IsReadOnly()) { - // Snapshot read doesn't care about locks. - return result; - } - - for (const auto& [_, shardInfo] : ShardsInfo) { - for (const auto& [_, lockInfo] : shardInfo.Locks) { - if (lockInfo.LocksAcquireFailure) { - result.Ok = false; - result.LocksAcquireFailure = lockInfo.LocksAcquireFailure; - } - if (lockInfo.Invalidated) { - result.Ok = false; - result.BrokenLocks.push_back(lockInfo.Lock); - } - } - } - return result; - }*/ - bool BrokenLocks() const override { return LocksIssue.has_value() && !(HasSnapshot() && IsReadOnly()); } diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 2f4d21290cff..6d34b3933385 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -57,12 +57,6 @@ class IKqpTransactionManager { virtual bool HasSnapshot() const = 0; virtual void SetHasSnapshot(bool hasSnapshot) = 0; - /*struct TCheckLocksResult { - bool Ok = false; - std::vector BrokenLocks; - bool LocksAcquireFailure = false; - }; - virtual TCheckLocksResult CheckLocks() const = 0;*/ virtual bool BrokenLocks() const = 0; virtual const std::optional& GetLockIssue() const = 0; From 346ae6701c3cb37b691e4a8decb61487a3d472b8 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 15:07:52 +0300 Subject: [PATCH 25/69] fix --- ydb/core/kqp/common/buffer/events.h | 7 - ydb/core/kqp/common/kqp_tx_manager.cpp | 18 +- .../kqp/executer_actor/kqp_data_executer.cpp | 1 + ydb/core/kqp/runtime/kqp_write_actor.cpp | 236 +++++++++++------- .../kqp/session_actor/kqp_session_actor.cpp | 1 - ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 6 +- 6 files changed, 161 insertions(+), 108 deletions(-) diff --git a/ydb/core/kqp/common/buffer/events.h b/ydb/core/kqp/common/buffer/events.h index 86246666e747..3326d4f22d2d 100644 --- a/ydb/core/kqp/common/buffer/events.h +++ b/ydb/core/kqp/common/buffer/events.h @@ -8,13 +8,6 @@ namespace NKikimr { namespace NKqp { -struct TPreparedInfo { - ui64 ShardId; - ui64 MinStep; - ui64 MaxStep; - TVector Coordinators; -}; - struct TEvKqpBuffer { struct TEvPrepare : public TEventLocal { diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 1af2fa4ccbc0..9e2e93709cdf 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -203,13 +203,16 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(State == ETransactionState::COLLECTING); AFL_ENSURE(!IsReadOnly()); - for (const auto& [shardId, shardInfo] : ShardsInfo) { + for (auto& [shardId, shardInfo] : ShardsInfo) { if (shardInfo.Flags & EAction::WRITE) { ReceivingShards.insert(shardId); } - if (shardInfo.Flags & EAction::READ) { + if (!shardInfo.Locks.empty()) { SendingShards.insert(shardId); } + + AFL_ENSURE(shardInfo.State == EShardState::PROCESSING); + shardInfo.State = EShardState::PREPARING; } ShardsToWait = ShardsIds; @@ -267,8 +270,6 @@ class TKqpTransactionManager : public IKqpTransactionManager { && IsSingleShard())); shardInfo.State = EShardState::EXECUTING; } - - ShardsToWait = ShardsIds; } TCommitInfo GetCommitInfo() override { @@ -284,8 +285,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { .AffectedFlags = shardInfo.Flags, }); - AFL_ENSURE(shardInfo.State == EShardState::PREPARED || shardInfo.State == EShardState::PROCESSING); - shardInfo.State = EShardState::EXECUTING; + AFL_ENSURE(shardInfo.State == EShardState::EXECUTING); } return result; } @@ -296,9 +296,9 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(shardInfo.State == EShardState::EXECUTING); shardInfo.State = EShardState::FINISHED; - ShardsToWait.erase(shardId); - - return ShardsToWait.empty(); + // Either all shards committed or all shards failed, + // so we need to wait only for one answer. + return true; } private: diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index a06ed94d77cb..5da0774478d6 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -1909,6 +1909,7 @@ class TKqpDataExecuter : public TKqpExecuterBase(); event->ExecuterActorId = SelfId(); + event->TxId = TxId; Send(BufferActorId, event.release()); Become(&TKqpDataExecuter::FinalizeState); return; diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 2bd168570c2b..00bf5e9c12af 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -67,7 +67,7 @@ struct IKqpTableWriterCallbacks { virtual void OnReady() = 0; // EvWrite statuses - virtual void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) = 0; + virtual void OnPrepared(IKqpTransactionManager::TPrepareResult&& preparedInfo, ui64 dataSize) = 0; virtual void OnCommitted(ui64 shardId, ui64 dataSize) = 0; virtual void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) = 0; @@ -629,12 +629,18 @@ class TKqpTableWriteActor : public TActorBootstrapped { void ProcessWritePreparedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { YQL_ENSURE(Mode == EMode::PREPARE); const auto& record = ev->Get()->Record; - TPreparedInfo preparedInfo; + IKqpTransactionManager::TPrepareResult preparedInfo; preparedInfo.ShardId = record.GetOrigin(); preparedInfo.MinStep = record.GetMinStep(); preparedInfo.MaxStep = record.GetMaxStep(); - preparedInfo.Coordinators = TVector(record.GetDomainCoordinators().begin(), - record.GetDomainCoordinators().end()); + + preparedInfo.Coordinator = 0; + if (record.DomainCoordinatorsSize()) { + auto domainCoordinators = TCoordinators(TVector(record.GetDomainCoordinators().begin(), + record.GetDomainCoordinators().end())); + preparedInfo.Coordinator = domainCoordinators.Select(*TxId); + } + const auto result = ShardedWriteController->OnMessageAcknowledged( ev->Get()->Record.GetOrigin(), ev->Cookie); if (result) { @@ -669,27 +675,31 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } + if (Mode == EMode::COMMIT) { + Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), 0); + return; + } + const auto result = ShardedWriteController->OnMessageAcknowledged( ev->Get()->Record.GetOrigin(), ev->Cookie); - if (result && result->IsShardEmpty && (Mode == EMode::COMMIT || Mode == EMode::IMMEDIATE_COMMIT)) { + if (result && result->IsShardEmpty && Mode == EMode::IMMEDIATE_COMMIT) { Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); } else if (result) { Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), SchemeEntry->TableId, result->DataSize, false); } } - void SetPrepare() { + void SetPrepare(ui64 txId) { YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::PREPARE; - //PrepareSettings = prepareSettings; + TxId = txId; ShardedWriteController->AddCoveringMessages(); } - //void SetCommit() { - // //TODO: do we need it? - // YQL_ENSURE(Mode == EMode::PREPARE); - // Mode = EMode::COMMIT; - //} + void SetDistributedCommit() { + YQL_ENSURE(Mode == EMode::PREPARE); + Mode = EMode::COMMIT; + } void SetImmediateCommit() { YQL_ENSURE(Mode == EMode::WRITE); @@ -745,45 +755,46 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } } else if (isPrepare) { - /*evWrite->Record.SetTxId(PrepareSettings->TxId); - auto* locks = evWrite->Record.MutableLocks(); - locks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + YQL_ENSURE(TxId); + evWrite->Record.SetTxId(*TxId); + auto* protoLocks = evWrite->Record.MutableLocks(); + protoLocks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - if (!PrepareSettings->ArbiterColumnShard) { - for (const ui64 sendingShardId : PrepareSettings->SendingShards) { - locks->AddSendingShards(sendingShardId); + const auto prepareSettings = TxManager->GetPrepareTransactionInfo(); + if (!prepareSettings.ArbiterColumnShard) { + for (const ui64 sendingShardId : prepareSettings.SendingShards) { + protoLocks->AddSendingShards(sendingShardId); } - for (const ui64 receivingShardId : PrepareSettings->ReceivingShards) { - locks->AddReceivingShards(receivingShardId); + for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { + protoLocks->AddReceivingShards(receivingShardId); } - if (PrepareSettings->ArbiterShard) { - locks->SetArbiterShard(*PrepareSettings->ArbiterShard); + if (prepareSettings.Arbiter) { + protoLocks->SetArbiterShard(*prepareSettings.Arbiter); } - } else if (PrepareSettings->ArbiterColumnShard == shardId) { - locks->SetArbiterColumnShard(*PrepareSettings->ArbiterColumnShard); - for (const ui64 sendingShardId : PrepareSettings->SendingShards) { - locks->AddSendingShards(sendingShardId); + } else if (prepareSettings.ArbiterColumnShard == shardId) { + protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); + for (const ui64 sendingShardId : prepareSettings.SendingShards) { + protoLocks->AddSendingShards(sendingShardId); } - for (const ui64 receivingShardId : PrepareSettings->ReceivingShards) { - locks->AddReceivingShards(receivingShardId); + for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { + protoLocks->AddReceivingShards(receivingShardId); } } else { - locks->SetArbiterColumnShard(*PrepareSettings->ArbiterColumnShard); - locks->AddSendingShards(*PrepareSettings->ArbiterColumnShard); - locks->AddReceivingShards(*PrepareSettings->ArbiterColumnShard); - if (PrepareSettings->SendingShards.contains(shardId)) { - locks->AddSendingShards(shardId); + protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); + protoLocks->AddSendingShards(*prepareSettings.ArbiterColumnShard); + protoLocks->AddReceivingShards(*prepareSettings.ArbiterColumnShard); + if (prepareSettings.SendingShards.contains(shardId)) { + protoLocks->AddSendingShards(shardId); } - if (PrepareSettings->ReceivingShards.contains(shardId)) { - locks->AddReceivingShards(shardId); + if (prepareSettings.ReceivingShards.contains(shardId)) { + protoLocks->AddReceivingShards(shardId); } - }*/ + } - // TODO: multi locks (for tablestore support) - //const auto lock = LocksManager.GetLock(shardId); - //if (lock) { - //*locks->AddLocks() = *lock; - //} + const auto locks = TxManager->GetLocks(shardId); + for (const auto& lock : locks) { + *protoLocks->AddLocks() = lock; + } } else if (!InconsistentTx) { evWrite->SetLockId(LockTxId, LockNodeId); } @@ -905,6 +916,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { const TTableId TableId; const TString TablePath; + std::optional TxId; const ui64 LockTxId; const ui64 LockNodeId; const bool InconsistentTx; @@ -918,8 +930,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { IKqpTransactionManagerPtr TxManager; bool Closed = false; EMode Mode = EMode::WRITE; - - //std::shared_ptr PrepareSettings; IShardedWriteControllerPtr ShardedWriteController = nullptr; }; @@ -1074,7 +1084,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu Process(); } - void OnPrepared(TPreparedInfo&&, ui64) override { + void OnPrepared(IKqpTransactionManager::TPrepareResult&&, ui64) override { AFL_ENSURE(false); } @@ -1206,6 +1216,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub hFunc(TEvKqpBuffer::TEvTerminate, Handle); hFunc(TEvKqpBuffer::TEvFlush, Handle); hFunc(TEvKqpBuffer::TEvCommit, Handle); + hFunc(TEvKqpBuffer::TEvRollback, Handle); + hFunc(TEvTxProxy::TEvProposeTransactionStatus, Handle); + hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); hFunc(TEvBufferWrite, Handle); default: AFL_ENSURE(false)("unknown message", ev->GetTypeRewrite()); @@ -1334,15 +1347,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - /*if (State == EState::PREPARING) { - bool isFinished = true; - for (auto& [_, info] : WriteInfos) { - isFinished &= info.WriteTableActor->IsFinished(); - } - if (isFinished) { - OnFinished(); - } - }*/ if (State == EState::FLUSHING) { bool isEmpty = true; for (auto& [_, info] : WriteInfos) { @@ -1354,29 +1358,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - /*THashMap GetLocks(TWriteToken token) const { - auto& info = WriteInfos.at(token.TableId); - THashMap result; - for (const auto& [shardId, lockInfo] : info.WriteTableActor->GetLocks()) { - if (const auto lock = lockInfo.GetLock(); lock) { - result.emplace(shardId, *lock); - } - } - return result; - } - - THashMap GetLocks() const { - THashMap result; - for (const auto& [_, info] : WriteInfos) { - for (const auto& [shardId, lockInfo] : info.WriteTableActor->GetLocks()) { - if (const auto lock = lockInfo.GetLock(); lock) { - result.emplace(shardId, *lock); - } - } - } - return result; - }*/ - void Flush() { YQL_ENSURE(State == EState::WRITING); State = EState::FLUSHING; @@ -1386,12 +1367,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Prepare(const ui64 txId) { YQL_ENSURE(State == EState::WRITING); State = EState::PREPARING; - Y_UNUSED(txId); + TxId = txId; + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->SetPrepare(txId); + } // TODO: Additional actor for reads commit - //for (auto& [_, info] : WriteInfos) { - //info.WriteTableActor->SetPrepare(prepareSettings); - //GetShardsIds() - //} Close(); Process(); } @@ -1406,6 +1386,44 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Process(); } + void DistributedCommit() { + YQL_ENSURE(State == EState::PREPARING); + State = EState::COMMITTING; + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->SetDistributedCommit(); + } + // TODO: Additional actor for reads commit + SendCommitToCoordinator(); + } + + void SendCommitToCoordinator() { + const auto commitInfo = TxManager->GetCommitInfo(); + + auto ev = MakeHolder(); + + YQL_ENSURE(commitInfo.Coordinator); + ev->Record.SetCoordinatorID(commitInfo.Coordinator); + + auto& transaction = *ev->Record.MutableTransaction(); + auto& affectedSet = *transaction.MutableAffectedSet(); + affectedSet.Reserve(commitInfo.ShardsInfo.size()); + + YQL_ENSURE(TxId); + transaction.SetTxId(*TxId); + transaction.SetMinStep(commitInfo.MinStep); + transaction.SetMaxStep(commitInfo.MaxStep); + + for (const auto& shardInfo : commitInfo.ShardsInfo) { + auto& item = *affectedSet.Add(); + item.SetTabletId(shardInfo.ShardId); + item.SetFlags(shardInfo.AffectedFlags); + } + + //TODO: NDataIntegrity & Volatile + CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator); + Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(ev.Release(), commitInfo.Coordinator, /* subscribe */ true)); + } + void Close() { for (auto& [_, info] : WriteInfos) { if (!info.WriteTableActor->IsClosed()) { @@ -1465,6 +1483,42 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub TActorBootstrapped::PassAway(); } + void Handle(TEvTxProxy::TEvProposeTransactionStatus::TPtr &ev) { + // TODO: move it to commit actor??? + TEvTxProxy::TEvProposeTransactionStatus* res = ev->Get(); + CA_LOG_D("Got transaction status, status: " << res->GetStatus()); + + switch (res->GetStatus()) { + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusAccepted: + // TODO: metrics + break; + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusProcessed: + break; + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusConfirmed: + break; + + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusPlanned: + break; + + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusOutdated: + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusDeclined: + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusDeclinedNoSpace: + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusRestarting: + // TODO: CancelProposal + ReplyErrorAndDie(TStringBuilder() << "Failed to plan transaction, status: " << res->GetStatus(), NYql::NDqProto::StatusIds::UNAVAILABLE, {}); + break; + + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusUnknown: + case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusAborted: + ReplyErrorAndDie(TStringBuilder() << "Unexpected TEvProposeTransactionStatus status: " << res->GetStatus(), NYql::NDqProto::StatusIds::INTERNAL_ERROR, {}); + break; + } + } + + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr&) { + ReplyErrorAndDie(TStringBuilder() << "Failed to deviler message.", NYql::NDqProto::StatusIds::UNAVAILABLE, {}); + } + void Handle(TEvKqpBuffer::TEvTerminate::TPtr&) { PassAway(); } @@ -1486,14 +1540,24 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } + void Handle(TEvKqpBuffer::TEvRollback::TPtr& ev) { + // TODO: rollback using only commit actor + ExecuterActorId = ev->Get()->ExecuterActorId; + Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); + } + void OnReady() override { Process(); } - void OnPrepared(TPreparedInfo&& preparedInfo, ui64 dataSize) override { + void OnPrepared(IKqpTransactionManager::TPrepareResult&& preparedInfo, ui64 dataSize) override { AFL_ENSURE(State == EState::PREPARING); Y_UNUSED(preparedInfo, dataSize); - // TODO: collect info for commit + if (TxManager->ConsumePrepareTransactionResult(std::move(preparedInfo))) { + TxManager->StartExecute(); + DistributedCommit(); + return; + } Process(); } @@ -1504,7 +1568,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); ExecuterActorId = {}; } - // Process(); // Don't need it? } void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { @@ -1512,10 +1575,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Process(); } - void OnFinished() { - // TODO: send collected data - } - void OnFlushed() { State = EState::WRITING; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); @@ -1544,6 +1603,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub TActorId ExecuterActorId; IKqpTransactionManagerPtr TxManager; + std::optional TxId; ui64 LockTxId = 0; ui64 LockNodeId = 0; bool InconsistentTx = false; diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 6b25135fbf85..9f297d775d5f 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -2319,7 +2319,6 @@ class TKqpSessionActor : public TActorBootstrapped { FillTxInfo(response); ExecuterId = TActorId{}; - Cerr << "HERE" << Endl; Cleanup(IsFatalError(ydbStatus)); } diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 187371c352d2..56f611be55cf 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3713,7 +3713,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson(output, R"([[0u;[0];#];[1u;#;["test"]]])"); @@ -3723,7 +3723,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto it = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col3) VALUES (0u, 'null'); REPLACE INTO `/Root/DataShard` (Col1) VALUES (1u); - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } @@ -3731,7 +3731,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson(output, R"([[0u;#;["null"]];[1u;#;#]])"); From d290b7aa1ee2c26741cd66a05eb68f014d6c4f41 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 18:33:09 +0300 Subject: [PATCH 26/69] prepare-and-rollback --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 406 +++++++++++++++++------ 1 file changed, 304 insertions(+), 102 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 00bf5e9c12af..f207188895c2 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -54,6 +54,58 @@ namespace { return NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UNSPECIFIED; } } + + void FillEvWritePrepare(NKikimr::NEvents::TDataEvents::TEvWrite* evWrite, ui64 shardId, ui64 txId, const NKikimr::NKqp::IKqpTransactionManagerPtr& txManager) { + evWrite->Record.SetTxId(txId); + auto* protoLocks = evWrite->Record.MutableLocks(); + protoLocks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + + const auto prepareSettings = txManager->GetPrepareTransactionInfo(); + if (!prepareSettings.ArbiterColumnShard) { + for (const ui64 sendingShardId : prepareSettings.SendingShards) { + protoLocks->AddSendingShards(sendingShardId); + } + for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { + protoLocks->AddReceivingShards(receivingShardId); + } + if (prepareSettings.Arbiter) { + protoLocks->SetArbiterShard(*prepareSettings.Arbiter); + } + } else if (prepareSettings.ArbiterColumnShard == shardId) { + protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); + for (const ui64 sendingShardId : prepareSettings.SendingShards) { + protoLocks->AddSendingShards(sendingShardId); + } + for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { + protoLocks->AddReceivingShards(receivingShardId); + } + } else { + protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); + protoLocks->AddSendingShards(*prepareSettings.ArbiterColumnShard); + protoLocks->AddReceivingShards(*prepareSettings.ArbiterColumnShard); + if (prepareSettings.SendingShards.contains(shardId)) { + protoLocks->AddSendingShards(shardId); + } + if (prepareSettings.ReceivingShards.contains(shardId)) { + protoLocks->AddReceivingShards(shardId); + } + } + + const auto locks = txManager->GetLocks(shardId); + for (const auto& lock : locks) { + *protoLocks->AddLocks() = lock; + } + } + + void FillEvWriteRollback(NKikimr::NEvents::TDataEvents::TEvWrite* evWrite, ui64 shardId, const NKikimr::NKqp::IKqpTransactionManagerPtr& txManager) { + auto* protoLocks = evWrite->Record.MutableLocks(); + protoLocks->SetOp(NKikimrDataEvents::TKqpLocks::Rollback); + + const auto locks = txManager->GetLocks(shardId); + for (const auto& lock : locks) { + *protoLocks->AddLocks() = lock; + } + } } @@ -74,56 +126,6 @@ struct IKqpTableWriterCallbacks { virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; }; -/*class TKqpShardsCommitActor : public TActorBootstrapped { - using TBase = TActorBootstrapped; - - struct TEvPrivate { - enum EEv { - EvTerminate, - }; - - struct TEvTerminate : public TEventLocal { - }; - }; - -public: - TKqpShardsCommitActor( - IKqpTableWriterCallbacks* callbacks, - IKqpTransactionManagerPtr txManager) { - } - - void Bootstrap() { - LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; - Become(&TKqpShardsCommitActor::StatePreparing); - } - - static constexpr char ActorName[] = "KQP_TABLE_WRITE_ACTOR"; - - STFUNC(StateProcessing) { - try { - switch (ev->GetTypeRewrite()) { - } - } catch (const yexception& e) { - RuntimeError(e.what(), NYql::NDqProto::StatusIds::INTERNAL_ERROR); - } - } - - void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { - Callbacks->OnError(message, statusCode, subIssues); - } - - void PassAway() override {; - Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); - TActorBootstrapped::PassAway(); - } - - void Terminate() { - Send(this->SelfId(), new TEvPrivate::TEvTerminate{}); - } - -private: - IKqpTransactionManagerPtr TxManager; -};*/ class TKqpTableWriteActor : public TActorBootstrapped { using TBase = TActorBootstrapped; @@ -312,7 +314,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { hFunc(TEvTxProxySchemeCache::TEvResolveKeySetResult, Handle); hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); hFunc(TEvPrivate::TEvShardRequestTimeout, Handle); - hFunc(TEvPrivate::TEvTerminate, Handle); hFunc(TEvPrivate::TEvResolveRequestPlanned, Handle); IgnoreFunc(TEvInterconnect::TEvNodeConnected); IgnoreFunc(TEvTxProxySchemeCache::TEvInvalidateTableResult); @@ -323,7 +324,13 @@ class TKqpTableWriteActor : public TActorBootstrapped { } STFUNC(StateTerminating) { - Y_UNUSED(ev); + try { + switch (ev->GetTypeRewrite()) { + hFunc(TEvPrivate::TEvTerminate, Handle); + } + } catch (const yexception& e) { + CA_LOG_E(e.what()); + } } bool IsResolving() const { @@ -705,8 +712,11 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::IMMEDIATE_COMMIT; - YQL_ENSURE(ShardedWriteController->GetShardsCount() == 1); - ShardedWriteController->AddCoveringMessages(); + if (ShardedWriteController->GetShardsCount() == 1) { + ShardedWriteController->AddCoveringMessages(); + } else { + YQL_ENSURE(ShardedWriteController->GetShardsCount() == 0); + } } void Flush() { @@ -756,45 +766,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } else if (isPrepare) { YQL_ENSURE(TxId); - evWrite->Record.SetTxId(*TxId); - auto* protoLocks = evWrite->Record.MutableLocks(); - protoLocks->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - - const auto prepareSettings = TxManager->GetPrepareTransactionInfo(); - if (!prepareSettings.ArbiterColumnShard) { - for (const ui64 sendingShardId : prepareSettings.SendingShards) { - protoLocks->AddSendingShards(sendingShardId); - } - for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { - protoLocks->AddReceivingShards(receivingShardId); - } - if (prepareSettings.Arbiter) { - protoLocks->SetArbiterShard(*prepareSettings.Arbiter); - } - } else if (prepareSettings.ArbiterColumnShard == shardId) { - protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); - for (const ui64 sendingShardId : prepareSettings.SendingShards) { - protoLocks->AddSendingShards(sendingShardId); - } - for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { - protoLocks->AddReceivingShards(receivingShardId); - } - } else { - protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); - protoLocks->AddSendingShards(*prepareSettings.ArbiterColumnShard); - protoLocks->AddReceivingShards(*prepareSettings.ArbiterColumnShard); - if (prepareSettings.SendingShards.contains(shardId)) { - protoLocks->AddSendingShards(shardId); - } - if (prepareSettings.ReceivingShards.contains(shardId)) { - protoLocks->AddReceivingShards(shardId); - } - } - - const auto locks = TxManager->GetLocks(shardId); - for (const auto& lock : locks) { - *protoLocks->AddLocks() = lock; - } + FillEvWritePrepare(evWrite.get(), shardId, *TxId, TxManager); } else if (!InconsistentTx) { evWrite->SetLockId(LockTxId, LockNodeId); } @@ -1205,21 +1177,24 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Bootstrap() { LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; - Become(&TKqpBufferWriteActor::StateFunc); + Become(&TKqpBufferWriteActor::StateWrite); } static constexpr char ActorName[] = "KQP_BUFFER_WRITE_ACTOR"; - STFUNC(StateFunc) { + // TODO: split states + STFUNC(StateWrite) { try { switch (ev->GetTypeRewrite()) { hFunc(TEvKqpBuffer::TEvTerminate, Handle); hFunc(TEvKqpBuffer::TEvFlush, Handle); hFunc(TEvKqpBuffer::TEvCommit, Handle); hFunc(TEvKqpBuffer::TEvRollback, Handle); + hFunc(TEvBufferWrite, Handle); + hFunc(TEvTxProxy::TEvProposeTransactionStatus, Handle); + hFunc(NKikimr::NEvents::TDataEvents::TEvWriteResult, Handle); hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); - hFunc(TEvBufferWrite, Handle); default: AFL_ENSURE(false)("unknown message", ev->GetTypeRewrite()); } @@ -1371,9 +1346,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetPrepare(txId); } - // TODO: Additional actor for reads commit Close(); Process(); + SendToExternalShards(); } void ImmediateCommit() { @@ -1392,10 +1367,58 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetDistributedCommit(); } - // TODO: Additional actor for reads commit SendCommitToCoordinator(); } + void Rollback() { + YQL_ENSURE(State == EState::ROLLINGBACK); + SendToExternalShards(); + } + + void SendToExternalShards() { + const bool isRollback = (State == EState::ROLLINGBACK); + + THashSet ExternalShards = TxManager->GetShards(); + for (auto& [_, info] : WriteInfos) { + for (const auto& shardId : info.WriteTableActor->GetShardsIds()) { + ExternalShards.erase(shardId); + } + } + + for (const ui64 shardId : ExternalShards) { + + auto evWrite = std::make_unique(isRollback + ? NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE + : NKikimrDataEvents::TEvWrite::MODE_PREPARE); + + if (isRollback) { + FillEvWriteRollback(evWrite.get(), shardId, TxManager); + } else { + YQL_ENSURE(TxId); + FillEvWritePrepare(evWrite.get(), shardId, *TxId, TxManager); + } + + CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", isPrepare=" << !isRollback << ", isImmediateCommit=" << isRollback << ", TxId=" << evWrite->Record.GetTxId() + << ", LockTxId=" << evWrite->Record.GetLockTxId() << ", LockNodeId=" << evWrite->Record.GetLockNodeId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : evWrite->Record.GetLocks().GetLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Size=" << 0 << ", Cookie=" << 0 + << ", OperationsCount=" << 0 << ", IsFinal=" << 1 + << ", Attempts=" << 0); + + Send( + NKikimr::MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward(evWrite.release(), shardId, true), + 0, + 0); + } + } + void SendCommitToCoordinator() { const auto commitInfo = TxManager->GetCommitInfo(); @@ -1515,7 +1538,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr&) { + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { + CA_LOG_W("TEvDeliveryProblem was received from tablet: " << ev->Get()->TabletId); ReplyErrorAndDie(TStringBuilder() << "Failed to deviler message.", NYql::NDqProto::StatusIds::UNAVAILABLE, {}); } @@ -1531,7 +1555,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Handle(TEvKqpBuffer::TEvCommit::TPtr& ev) { ExecuterActorId = ev->Get()->ExecuterActorId; YQL_ENSURE(!TxManager->IsReadOnly()); - if (TxManager->IsSingleShard()) { + if (TxManager->IsSingleShard() && !WriteInfos.empty()) { TxManager->StartExecute(); ImmediateCommit(); } else { @@ -1541,11 +1565,189 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Handle(TEvKqpBuffer::TEvRollback::TPtr& ev) { - // TODO: rollback using only commit actor ExecuterActorId = ev->Get()->ExecuterActorId; + Rollback(); Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); } + void Handle(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { + auto getIssues = [&ev]() { + NYql::TIssues issues; + NYql::IssuesFromMessage(ev->Get()->Record.GetIssues(), issues); + return issues; + }; + + CA_LOG_D("Recv EvWriteResult from ShardID=" << ev->Get()->Record.GetOrigin() + << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) + << ", TxId=" << ev->Get()->Record.GetTxId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Cookie=" << ev->Cookie); + + // TODO: get rid of copy-paste + switch (ev->Get()->GetStatus()) { + case NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED: { + CA_LOG_E("Got UNSPECIFIED for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + ReplyErrorAndDie( + TStringBuilder() << "Unspecified error for table. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::UNSPECIFIED, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_PREPARED: { + ProcessWritePreparedShard(ev); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED: { + ProcessWriteCompletedShard(ev); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_ABORTED: { + CA_LOG_E("Got ABORTED for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + ReplyErrorAndDie( + TStringBuilder() << "Aborted for table. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::ABORTED, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR: { + CA_LOG_E("Got INTERNAL ERROR for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + + ReplyErrorAndDie( + TStringBuilder() << "Internal error for table. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::INTERNAL_ERROR, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_DISK_SPACE_EXHAUSTED: { + CA_LOG_E("Got DISK_SPACE_EXHAUSTED for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + + ReplyErrorAndDie( + TStringBuilder() << "Disk space exhausted for table. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::PRECONDITION_FAILED, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED: { + CA_LOG_W("Got OVERLOADED for table ." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << " Ignored this error." + << getIssues().ToOneLineString()); + // TODO: support waiting + ReplyErrorAndDie( + TStringBuilder() << "Tablet " << ev->Get()->Record.GetOrigin() << " is overloaded. Table. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::OVERLOADED, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED: { + CA_LOG_E("Got CANCELLED for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + ReplyErrorAndDie( + TStringBuilder() << "Cancelled request to table." + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::CANCELLED, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST: { + CA_LOG_E("Got BAD REQUEST for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + ReplyErrorAndDie( + TStringBuilder() << "Bad request. Table. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::BAD_REQUEST, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_SCHEME_CHANGED: { + CA_LOG_E("Got SCHEME CHANGED for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + ReplyErrorAndDie( + TStringBuilder() << "Scheme changed. Table. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::SCHEME_ERROR, + getIssues()); + return; + } + case NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN: { + CA_LOG_E("Got LOCKS BROKEN for table." + << " ShardID=" << ev->Get()->Record.GetOrigin() << "," + << " Sink=" << this->SelfId() << "." + << getIssues().ToOneLineString()); + ReplyErrorAndDie( + TStringBuilder() << "Transaction locks invalidated.. " + << getIssues().ToOneLineString(), + NYql::NDqProto::StatusIds::ABORTED, + getIssues()); + return; + } + } + } + + void ProcessWritePreparedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { + YQL_ENSURE(State == EState::COMMITTING); + const auto& record = ev->Get()->Record; + IKqpTransactionManager::TPrepareResult preparedInfo; + preparedInfo.ShardId = record.GetOrigin(); + preparedInfo.MinStep = record.GetMinStep(); + preparedInfo.MaxStep = record.GetMaxStep(); + + preparedInfo.Coordinator = 0; + if (record.DomainCoordinatorsSize()) { + auto domainCoordinators = TCoordinators(TVector(record.GetDomainCoordinators().begin(), + record.GetDomainCoordinators().end())); + preparedInfo.Coordinator = domainCoordinators.Select(*TxId); + } + + OnPrepared(std::move(preparedInfo), 0); + } + + void ProcessWriteCompletedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { + YQL_ENSURE(State == EState::COMMITTING); + CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() + << ", TabletId=" << ev->Get()->Record.GetOrigin() + << ", Cookie=" << ev->Cookie + << ", Locks=" << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + + OnCommitted(ev->Get()->Record.GetOrigin(), 0); + } + void OnReady() override { Process(); } @@ -1585,7 +1787,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub ReplyErrorAndDie(message, statusCode, subIssues); } - void ReplyErrorAndDie(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) { + void ReplyErrorAndDie(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { CA_LOG_E(message << ". statusCode=" << NYql::NDqProto::StatusIds_StatusCode_Name(statusCode) << ". subIssues=" << subIssues.ToString()); Send(SessionActorId, new TEvKqpBuffer::TEvError{ message, From f2252edec6f3646d379260f7e3a284ae5077e50e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 19:34:55 +0300 Subject: [PATCH 27/69] rollback --- ydb/core/kqp/executer_actor/kqp_data_executer.cpp | 7 ++++--- ydb/core/kqp/runtime/kqp_write_actor.cpp | 15 ++++++++++++--- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 8 ++++---- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 5da0774478d6..7523a73af7a4 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -1918,7 +1918,7 @@ class TKqpDataExecuter : public TKqpExecuterBase(); event->ExecuterActorId = SelfId(); Send(BufferActorId, event.release()); - Become(&TKqpDataExecuter::FinalizeState); + MakeResponseAndPassAway(); return; } @@ -2074,7 +2074,9 @@ class TKqpDataExecuter : public TKqpExecuterBase, pub PREPARING, // Do preparation for commit. All writers are closed. New writes wouldn't be accepted. COMMITTING, // Do immediate commit (single shard). All writers are closed. New writes wouldn't be accepted. ROLLINGBACK, // Do rollback. New writes wouldn't be accepted. + FINISHED, }; public: @@ -1371,7 +1372,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Rollback() { - YQL_ENSURE(State == EState::ROLLINGBACK); + State = EState::ROLLINGBACK; SendToExternalShards(); } @@ -1487,6 +1488,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void PassAway() override { + if (State != EState::FINISHED) { + Rollback(); + } for (auto& [_, queue] : DataQueues) { while (!queue.empty()) { auto& message = queue.front(); @@ -1554,8 +1558,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Handle(TEvKqpBuffer::TEvCommit::TPtr& ev) { ExecuterActorId = ev->Get()->ExecuterActorId; - YQL_ENSURE(!TxManager->IsReadOnly()); - if (TxManager->IsSingleShard() && !WriteInfos.empty()) { + if (TxManager->IsReadOnly()) { + Rollback(); + State = EState::FINISHED; + Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); + } else if (TxManager->IsSingleShard() && !WriteInfos.empty()) { TxManager->StartExecute(); ImmediateCommit(); } else { @@ -1567,6 +1574,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Handle(TEvKqpBuffer::TEvRollback::TPtr& ev) { ExecuterActorId = ev->Get()->ExecuterActorId; Rollback(); + State = EState::FINISHED; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); } @@ -1767,6 +1775,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub AFL_ENSURE(State == EState::COMMITTING); Y_UNUSED(shardId, dataSize); if (TxManager->ConsumeCommitResult(shardId)) { + State = EState::FINISHED; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); ExecuterActorId = {}; } diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 56f611be55cf..a6692bccd21d 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3852,14 +3852,14 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { INSERT INTO `/Root/DataShard` (Col1, Col2) VALUES (0u, 0); INSERT INTO `/Root/DataShard` (Col1, Col3) VALUES (1u, 'test'); INSERT INTO `/Root/DataShard` (Col1, Col3, Col2) VALUES (2u, 't', 3); - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard` ORDER BY Col1; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson(output, R"([[0u;[0];#];[1u;#;["test"]];[2u;[3];["t"]]])"); @@ -3868,7 +3868,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { { auto it = client.ExecuteQuery(R"( INSERT INTO `/Root/DataShard` (Col1, Col3) VALUES (0u, 'null'); - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(!it.IsSuccess(), it.GetIssues().ToString()); UNIT_ASSERT_C( it.GetIssues().ToString().Contains("Operation is aborting because an duplicate key") @@ -3879,7 +3879,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard` ORDER BY Col1; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson(output, R"([[0u;[0];#];[1u;#;["test"]];[2u;[3];["t"]]])"); From 4f8bf6aea7e10e3c88daaaf9a1f3fe7c4147f48a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 20:32:32 +0300 Subject: [PATCH 28/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 4 ++++ ydb/core/kqp/session_actor/kqp_query_state.h | 2 +- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index d30426bd3e62..4c1afa92eaf3 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -200,6 +200,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { void Bootstrap() { LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; + CA_LOG_D("New TKqpTableWriteActor for table `" << TablePath << "` (" << TableId << ")."); ResolveTable(); Become(&TKqpTableWriteActor::StateProcessing); } @@ -954,6 +955,8 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } WriteToken = WriteTableActor->Open(GetOperation(Settings.GetType()), std::move(columnsMetadata)); WaitingForTableActor = true; + + CA_LOG_D("New TKqpDirectWriteActor for table `" << Settings.GetTable().GetPath() << "` (" << TableId << ")."); } static constexpr char ActorName[] = "KQP_DIRECT_WRITE_ACTOR"; @@ -1221,6 +1224,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub auto& writeInfo = WriteInfos[settings.TableId]; if (!writeInfo.WriteTableActor) { + CA_LOG_D("Create new TableWriteActor for table `" << settings.TablePath << "` (" << settings.TableId << "). lockId=" << LockTxId); writeInfo.WriteTableActor = new TKqpTableWriteActor( this, settings.TableId, diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index c4fc7e80ae69..14f5aaace6ca 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -418,7 +418,7 @@ class TKqpQueryState : public TNonCopyable { if (TxCtx->CanDeferEffects()) { // At current time sinks require separate tnx with commit. - while (tx && tx->GetHasEffects() && !HasTxSinkInTx(tx)) { + while (tx && tx->GetHasEffects() /*&& !HasTxSinkInTx(tx)*/) { QueryData->CreateKqpValueMap(tx); bool success = TxCtx->AddDeferredEffect(tx, QueryData); YQL_ENSURE(success); diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 9f297d775d5f..373b03d28b08 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1305,10 +1305,10 @@ class TKqpSessionActor : public TActorBootstrapped { request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); - if (Settings.TableService.GetEnableOltpSink() && request.AcquireLocksTxId.Defined()) { + if (Settings.TableService.GetEnableOltpSink()) { txCtx->TxManager = CreateKqpTransactionManager(); } - if (Settings.TableService.GetEnableOltpSink() && !txCtx->BufferActorId && txCtx->HasOltpTable && request.AcquireLocksTxId.Defined()) { + if (Settings.TableService.GetEnableOltpSink() && !txCtx->BufferActorId) { TKqpBufferWriterSettings settings { .SessionActorId = SelfId(), .TxManager = txCtx->TxManager, From 074f9d6617a58f70b0b1c79ced66562b0d7e53a4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 24 Sep 2024 20:34:30 +0300 Subject: [PATCH 29/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 373b03d28b08..d0ccceef7661 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -2242,7 +2242,6 @@ class TKqpSessionActor : public TActorBootstrapped { } void EndCleanup(bool isFinal) { - Cerr << "EndCleanup, isFinal: " << isFinal << Endl; LOG_D("EndCleanup, isFinal: " << isFinal); if (QueryResponse) From 1cff0e8e241d7e391776fd373271a4214909ca6d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 25 Sep 2024 09:18:34 +0300 Subject: [PATCH 30/69] fix --- ydb/core/kqp/opt/kqp_opt_build_txs.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp index f15d85253640..b57a34791ef1 100644 --- a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp +++ b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp @@ -586,6 +586,8 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { private: TVector CollectEffects(const TExprList& list, TExprContext& ctx) { + return {list}; + struct TEffectsInfo { enum class EType { KQP_EFFECT, From 09548956fc5c6044029a0d4a5b5532996153a8e6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 25 Sep 2024 14:19:03 +0300 Subject: [PATCH 31/69] fix --- .../kqp/executer_actor/kqp_data_executer.cpp | 73 ++++++++++++------- ydb/core/kqp/runtime/kqp_write_actor.cpp | 18 ++++- ydb/core/kqp/session_actor/kqp_query_state.h | 4 +- .../kqp/session_actor/kqp_session_actor.cpp | 68 ++++++++++++----- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 12 +-- 5 files changed, 118 insertions(+), 57 deletions(-) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 7523a73af7a4..25426e803196 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -214,30 +214,6 @@ class TKqpDataExecuter : public TKqpExecuterBase(); - event->ExecuterActorId = SelfId(); - Send(BufferActorId, event.release()); - Become(&TKqpDataExecuter::FinalizeState); - } - } - - STATEFN(FinalizeState) { - switch(ev->GetTypeRewrite()) { - hFunc(TEvKqp::TEvAbortExecution, HandleAbortExecution); - hFunc(TEvKqpBuffer::TEvResult, HandleFinalize); - default: - LOG_W("Unexpected event: " << ev->GetTypeName() << ", at state: FinalizeState"); - } - } - - void HandleFinalize(TEvKqpBuffer::TEvResult::TPtr&) { - MakeResponseAndPassAway(); - } - - void MakeResponseAndPassAway() { YQL_ENSURE(!AlreadyReplied); if (LocksBroken) { YQL_ENSURE(ResponseEv->BrokenLockShardId); @@ -301,6 +277,9 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); + if (TxManager) { + TxManager->SetHasSnapshot(GetSnapshot().IsValid()); + } if (!Locks.empty() || (TxManager && !TxManager->IsEmpty())) { if (LockHandle) { @@ -309,6 +288,44 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.MutableResponse()->MutableResult()->MutableLocks(), Locks); } + if (!BufferActorId || ReadOnlyTx) { + MakeResponseAndPassAway(); + } else if (Request.LocksOp == ELocksOp::Commit ) { + auto event = std::make_unique(); + event->ExecuterActorId = SelfId(); + event->TxId = TxId; + Become(&TKqpDataExecuter::FinalizeState); + Send(BufferActorId, event.release()); + return; + } else if (Request.LocksOp == ELocksOp::Rollback) { + auto event = std::make_unique(); + event->ExecuterActorId = SelfId(); + Send(BufferActorId, event.release()); + MakeResponseAndPassAway(); + return; + } else { + auto event = std::make_unique(); + event->ExecuterActorId = SelfId(); + Become(&TKqpDataExecuter::FinalizeState); + Send(BufferActorId, event.release()); + } + } + + STATEFN(FinalizeState) { + switch(ev->GetTypeRewrite()) { + hFunc(TEvKqp::TEvAbortExecution, HandleAbortExecution); + hFunc(TEvKqpBuffer::TEvResult, HandleFinalize); + hFunc(TEvents::TEvPoison, HandleShutdown); + default: + LOG_W("Unexpected event: " << ev->GetTypeName() << ", at state: FinalizeState"); + } + } + + void HandleFinalize(TEvKqpBuffer::TEvResult::TPtr&) { + MakeResponseAndPassAway(); + } + + void MakeResponseAndPassAway() { auto resultSize = ResponseEv->GetByteSize(); if (resultSize > (int)ReplySizeLimit) { TString message; @@ -1904,7 +1921,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseOrbit, TxId); - if (BufferActorId && Request.LocksOp == ELocksOp::Commit) { + /*if (BufferActorId && Request.LocksOp == ELocksOp::Commit && Request.Transactions.empty()) { // TODO: skip resolving phase? Move it to session actor? YQL_ENSURE(Request.Transactions.empty()); auto event = std::make_unique(); @@ -1914,13 +1931,13 @@ class TKqpDataExecuter : public TKqpExecuterBase(); event->ExecuterActorId = SelfId(); Send(BufferActorId, event.release()); MakeResponseAndPassAway(); return; - } + }*/ size_t sourceScanPartitionsCount = 0; for (ui32 txIdx = 0; txIdx < Request.Transactions.size(); ++txIdx) { @@ -2074,7 +2091,7 @@ class TKqpDataExecuter : public TKqpExecuterBase, pub void ImmediateCommit() { YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; + CA_LOG_D("Start immediate commit"); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetImmediateCommit(); } @@ -1369,6 +1370,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void DistributedCommit() { YQL_ENSURE(State == EState::PREPARING); State = EState::COMMITTING; + CA_LOG_D("Start distributed commit TxId" << *TxId); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetDistributedCommit(); } @@ -1727,7 +1729,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ProcessWritePreparedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { - YQL_ENSURE(State == EState::COMMITTING); + if (State != EState::PREPARING) { + return; + } const auto& record = ev->Get()->Record; IKqpTransactionManager::TPrepareResult preparedInfo; preparedInfo.ShardId = record.GetOrigin(); @@ -1745,7 +1749,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ProcessWriteCompletedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { - YQL_ENSURE(State == EState::COMMITTING); + if (State != EState::COMMITTING) { + return; + } CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() << ", TabletId=" << ev->Get()->Record.GetOrigin() << ", Cookie=" << ev->Cookie @@ -1765,7 +1771,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void OnPrepared(IKqpTransactionManager::TPrepareResult&& preparedInfo, ui64 dataSize) override { - AFL_ENSURE(State == EState::PREPARING); + if (State != EState::PREPARING) { + return; + } Y_UNUSED(preparedInfo, dataSize); if (TxManager->ConsumePrepareTransactionResult(std::move(preparedInfo))) { TxManager->StartExecute(); @@ -1776,7 +1784,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void OnCommitted(ui64 shardId, ui64 dataSize) override { - AFL_ENSURE(State == EState::COMMITTING); + if (State != EState::COMMITTING) { + return; + } Y_UNUSED(shardId, dataSize); if (TxManager->ConsumeCommitResult(shardId)) { State = EState::FINISHED; diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index 14f5aaace6ca..b11ac32fa751 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -347,10 +347,10 @@ class TKqpQueryState : public TNonCopyable { return true; } - if (HasTxSinkInTx(tx)) { + /*if (HasTxSinkInTx(tx)) { // Sink results can't be committed with changes return false; - } + }*/ if (TxCtx->HasOlapTable) { // HTAP/OLAP transactions always use separate commit. diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index d0ccceef7661..fe0873c942b3 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -321,7 +321,7 @@ class TKqpSessionActor : public TActorBootstrapped { } // TODO: support buffer actor - bool replied = ExecutePhyTx(/*tx*/ nullptr, /*commit*/ true); + bool replied = ExecutePhyTx(/*tx*/ nullptr, /*commit*/ true, false); if (!replied) { Become(&TKqpSessionActor::ExecuteState); } @@ -1093,14 +1093,15 @@ class TKqpSessionActor : public TActorBootstrapped { if (QueryState->TxCtx->ShouldExecuteDeferredEffects()) { ExecuteDeferredEffectsImmediately(); } else if (auto commit = QueryState->ShouldCommitWithCurrentTx(tx); commit || tx) { - ExecutePhyTx(tx, commit); + ExecutePhyTx(tx, commit, false); } else { ReplySuccess(); } + //} } void ExecuteDeferredEffectsImmediately() { - YQL_ENSURE(QueryState->TxCtx->ShouldExecuteDeferredEffects()); + //YQL_ENSURE(QueryState->TxCtx->ShouldExecuteDeferredEffects()); auto& txCtx = *QueryState->TxCtx; auto request = PrepareRequest(/* tx */ nullptr, /* literal */ false, QueryState.get()); @@ -1122,7 +1123,7 @@ class TKqpSessionActor : public TActorBootstrapped { SendToExecuter(QueryState->TxCtx.Get(), std::move(request)); } - bool ExecutePhyTx(const TKqpPhyTxHolder::TConstPtr& tx, bool commit) { + bool ExecutePhyTx(const TKqpPhyTxHolder::TConstPtr& tx, bool commit, bool) { if (tx) { switch (tx->GetType()) { case NKqpProto::TKqpPhyTx::TYPE_SCHEME: @@ -1221,27 +1222,60 @@ class TKqpSessionActor : public TActorBootstrapped { request.PerShardKeysSizeLimitBytes = Config->_CommitPerShardKeysSizeLimitBytes.Get().GetRef(); } - const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : txCtx.Locks.HasLocks(); - if (hasLocks || txCtx.TopicOperations.HasOperations() || !!txCtx.BufferActorId) { - if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { - LOG_D("TExecPhysicalRequest, tx has commit locks"); + request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); + + if (Settings.TableService.GetEnableOltpSink()) { + const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : false; + + if (hasLocks) { + if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { + LOG_D("TExecPhysicalRequest, tx has commit locks"); + request.LocksOp = ELocksOp::Commit; + } else { + LOG_D("TExecPhysicalRequest, tx has rollback locks"); + request.LocksOp = ELocksOp::Rollback; + } + } else if (txCtx.TxHasEffects()) { + LOG_D("TExecPhysicalRequest, need commit locks"); request.LocksOp = ELocksOp::Commit; - } else { - LOG_D("TExecPhysicalRequest, tx has rollback locks"); - request.LocksOp = ELocksOp::Rollback; } + } else { + const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : txCtx.Locks.HasLocks(); + if (hasLocks || txCtx.TopicOperations.HasOperations() || Settings.TableService.GetEnableOltpSink()) { + if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { + LOG_D("TExecPhysicalRequest, tx has commit locks"); + request.LocksOp = ELocksOp::Commit; + } else { + LOG_D("TExecPhysicalRequest, tx has rollback locks"); + request.LocksOp = ELocksOp::Rollback; + } - if (!txCtx.TxManager) { - for (auto& [lockId, lock] : txCtx.Locks.LocksMap) { - auto dsLock = ExtractLock(lock.GetValueRef(txCtx.Locks.LockType)); - request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); + if (!Settings.TableService.GetEnableOltpSink()) { + for (auto& [lockId, lock] : txCtx.Locks.LocksMap) { + auto dsLock = ExtractLock(lock.GetValueRef(txCtx.Locks.LockType)); + request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); + } + } else { + // TODO: support for non buffer actor writes } - } else { - // TODO: support for non buffer actor writes } } request.TopicOperations = std::move(txCtx.TopicOperations); + /*} else if (executeDeferred && !txCtx.DeferredEffects.Empty()) { + for (const auto& effect : txCtx.DeferredEffects) { + request.Transactions.emplace_back(effect.PhysicalTx, effect.Params); + + LOG_D("TExecPhysicalRequest, add DeferredEffect to Transaction," + << " current Transactions.size(): " << request.Transactions.size()); + } + + request.PerShardKeysSizeLimitBytes = Config->_CommitPerShardKeysSizeLimitBytes.Get().GetRef(); + request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); + request.UseImmediateEffects = true; + + txCtx.HasImmediateEffects = true; + txCtx.ClearDeferredEffects();*/ } else if (QueryState->ShouldAcquireLocks(tx) && (!txCtx.HasOlapTable || Settings.TableService.GetEnableOlapSink())) { request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index a6692bccd21d..d83e95cc4d4b 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -4198,14 +4198,14 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES (10u, "test1", 10), (20u, "test2", 11), (2147483647u, "test3", 12), (2147483640u, NULL, 13); - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); } { auto it = client.StreamExecuteQuery(R"( SELECT COUNT(*) FROM `/Root/DataShard`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson( @@ -4216,14 +4216,14 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); } { auto it = client.StreamExecuteQuery(R"( SELECT COUNT(*) FROM `/Root/DataShard2`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson( @@ -4236,14 +4236,14 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { REPLACE INTO `/Root/DataShard2` (Col1, Col2, Col3) VALUES (11u, "test1", 10), (21u, "test2", 11), (2147483646u, "test3", 12), (2147483641u, NULL, 13); SELECT COUNT(*) FROM `/Root/DataShard`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); } { auto it = client.StreamExecuteQuery(R"( SELECT COUNT(*) FROM `/Root/DataShard2`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson( From 736abbd2d15ef208a74728ae6dd0d86a1ad2a787 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 26 Sep 2024 02:23:38 +0300 Subject: [PATCH 32/69] fix --- ydb/core/kqp/common/kqp_tx_manager.cpp | 6 +- .../kqp/executer_actor/kqp_data_executer.cpp | 48 ++++---- .../kqp/executer_actor/kqp_executer_impl.h | 6 + ydb/core/kqp/runtime/kqp_write_actor.cpp | 103 +++++++++++++----- ydb/core/kqp/runtime/kqp_write_table.cpp | 3 +- .../datashard/datashard_write_operation.cpp | 2 +- 6 files changed, 110 insertions(+), 58 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 9e2e93709cdf..374f13810d43 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -33,7 +33,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { : CollectOnly(collectOnly) {} void AddShard(ui64 shardId, bool isOlap, const TString& path) override { - AFL_ENSURE(State == ETransactionState::COLLECTING); + Y_ABORT_UNLESS(State == ETransactionState::COLLECTING); ShardsIds.insert(shardId); auto& shardInfo = ShardsInfo[shardId]; shardInfo.IsOlap = isOlap; @@ -44,7 +44,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } void AddAction(ui64 shardId, ui8 action) override { - AFL_ENSURE(State == ETransactionState::COLLECTING); + Y_ABORT_UNLESS(State == ETransactionState::COLLECTING); ShardsInfo.at(shardId).Flags |= action; if (action & EAction::WRITE) { ReadOnly = false; @@ -53,7 +53,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { bool AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lockProto) override { TKqpLock lock(lockProto); - AFL_ENSURE(State == ETransactionState::COLLECTING); + Y_ABORT_UNLESS(State == ETransactionState::COLLECTING); bool isError = (lock.Proto.GetCounter() >= NKikimr::TSysTables::TLocksTable::TLock::ErrorMin); bool isInvalidated = (lock.Proto.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken) || (lock.Proto.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorBroken); diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 25426e803196..10ba23299297 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -157,7 +157,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseBrokenLockShardId); return ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); @@ -289,17 +290,23 @@ class TKqpDataExecuter : public TKqpExecuterBase(); event->ExecuterActorId = SelfId(); event->TxId = TxId; + LOG_D("SEND BUFFER COMMIT " << BufferActorId); Become(&TKqpDataExecuter::FinalizeState); Send(BufferActorId, event.release()); return; } else if (Request.LocksOp == ELocksOp::Rollback) { auto event = std::make_unique(); event->ExecuterActorId = SelfId(); + Become(&TKqpDataExecuter::FinalizeState); + LOG_D("SEND BUFFER ROLLBACK " << BufferActorId); Send(BufferActorId, event.release()); MakeResponseAndPassAway(); return; @@ -307,7 +314,9 @@ class TKqpDataExecuter : public TKqpExecuterBase(); event->ExecuterActorId = SelfId(); Become(&TKqpDataExecuter::FinalizeState); + LOG_D("SEND BUFFER FLUSH " << BufferActorId); Send(BufferActorId, event.release()); + return; } } @@ -315,7 +324,6 @@ class TKqpDataExecuter : public TKqpExecuterBaseGetTypeRewrite()) { hFunc(TEvKqp::TEvAbortExecution, HandleAbortExecution); hFunc(TEvKqpBuffer::TEvResult, HandleFinalize); - hFunc(TEvents::TEvPoison, HandleShutdown); default: LOG_W("Unexpected event: " << ev->GetTypeName() << ", at state: FinalizeState"); } @@ -577,6 +585,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.GetTxLocks(0).GetPathId()); } ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); + return; } default: { @@ -1249,6 +1258,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.GetTxLocks(0).GetSchemeShard(), res->Record.GetTxLocks(0).GetPathId()); ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); + return; } CheckExecutionComplete(); return; @@ -1921,24 +1931,6 @@ class TKqpDataExecuter : public TKqpExecuterBaseOrbit, TxId); - /*if (BufferActorId && Request.LocksOp == ELocksOp::Commit && Request.Transactions.empty()) { - // TODO: skip resolving phase? Move it to session actor? - YQL_ENSURE(Request.Transactions.empty()); - auto event = std::make_unique(); - event->ExecuterActorId = SelfId(); - event->TxId = TxId; - Send(BufferActorId, event.release()); - Become(&TKqpDataExecuter::FinalizeState); - return; - } else if (BufferActorId && Request.LocksOp == ELocksOp::Rollback) { - //YQL_ENSURE(Request.Transactions.empty()); - auto event = std::make_unique(); - event->ExecuterActorId = SelfId(); - Send(BufferActorId, event.release()); - MakeResponseAndPassAway(); - return; - }*/ - size_t sourceScanPartitionsCount = 0; for (ui32 txIdx = 0; txIdx < Request.Transactions.size(); ++txIdx) { auto& tx = Request.Transactions[txIdx]; @@ -2283,14 +2275,20 @@ class TKqpDataExecuter : public TKqpExecuterBaseSender); if (ev->Sender == SelfId()) { PassAway(); diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index fd368f6cb63d..bd3caadfa7b2 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -510,6 +510,12 @@ class TKqpExecuterBase : public TActorBootstrapped { } } + if (BufferActorId && Request.LocksOp == ELocksOp::Rollback) { + //YQL_ENSURE(Request.Transactions.empty()); + static_cast(this)->Finalize(); + return; + } + ExecuterStateSpan = NWilson::TSpan(TWilsonKqp::ExecuterTableResolve, ExecuterSpan.GetTraceId(), "WaitForTableResolve", NWilson::EFlags::AUTO_END); auto kqpTableResolver = CreateKqpTableResolver(this->SelfId(), TxId, UserToken, Request.Transactions, diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 64b85b872527..9559b51f29a4 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -250,6 +250,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { TableId, operationType, std::move(columnsMetadata)); + CA_LOG_D("Open write to " << TableId << " token=" << token); return token; } @@ -257,6 +258,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); YQL_ENSURE(!Closed); YQL_ENSURE(ShardedWriteController); + CA_LOG_D("Write to " << TableId << " token=" << token); try { ShardedWriteController->Write(token, data); UpdateShards(); @@ -270,6 +272,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { void Close(TWriteToken token) { YQL_ENSURE(!Closed); YQL_ENSURE(ShardedWriteController); + CA_LOG_D("Close write to " << TableId << " token=" << token); try { ShardedWriteController->Close(token); UpdateShards(); @@ -662,6 +665,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() << ", TabletId=" << ev->Get()->Record.GetOrigin() << ", Cookie=" << ev->Cookie + << " MODETEST= " << static_cast(Mode) << ", Locks=" << [&]() { TStringBuilder builder; for (const auto& lock : ev->Get()->Record.GetTxLocks()) { @@ -671,6 +675,20 @@ class TKqpTableWriteActor : public TActorBootstrapped { }()); for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + if (Mode != EMode::WRITE) { + CA_LOG_D("ERROR HERE TEST MODE" << static_cast(Mode) << " Got completed result TxId=" << ev->Get()->Record.GetTxId() + << ", TabletId=" << ev->Get()->Record.GetOrigin() + << ", Cookie=" << ev->Cookie + << ", Locks=" << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + Y_ABORT_UNLESS(false); + } + Y_ABORT_UNLESS(Mode == EMode::WRITE); if (!TxManager->AddLock(ev->Get()->Record.GetOrigin(), lock)) { YQL_ENSURE(TxManager->BrokenLocks()); NYql::TIssues issues; @@ -698,6 +716,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void SetPrepare(ui64 txId) { + CA_LOG_D("SetPrepare; txId=" << txId); YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::PREPARE; TxId = txId; @@ -705,11 +724,13 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void SetDistributedCommit() { + CA_LOG_D("SetDistributedCommit; txId=" << *TxId); YQL_ENSURE(Mode == EMode::PREPARE); Mode = EMode::COMMIT; } void SetImmediateCommit() { + CA_LOG_D("SetImmediateCommit"); YQL_ENSURE(Mode == EMode::WRITE); Mode = EMode::IMMEDIATE_COMMIT; @@ -748,6 +769,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { const bool isPrepare = metadata->IsFinal && Mode == EMode::PREPARE; const bool isImmediateCommit = metadata->IsFinal && Mode == EMode::IMMEDIATE_COMMIT; + Y_ABORT_UNLESS(!metadata->IsFinal || isPrepare || isImmediateCommit); + auto evWrite = std::make_unique(); evWrite->Record.SetTxMode(isPrepare @@ -786,7 +809,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { }() << ", Size=" << serializationResult.TotalDataSize << ", Cookie=" << metadata->Cookie << ", OperationsCount=" << metadata->OperationsCount << ", IsFinal=" << metadata->IsFinal - << ", Attempts=" << metadata->SendAttempts); + << ", Attempts=" << metadata->SendAttempts << ", Mode=" << static_cast(Mode)); Send( PipeCacheId, new TEvPipeCache::TEvForward(evWrite.release(), shardId, true), @@ -813,6 +836,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { CA_LOG_D("Retry failed: not found ShardID=" << shardId << " with Cookie=" << ifCookieEqual.value_or(0)); return; } + Y_ABORT_UNLESS(false); CA_LOG_D("Retry ShardID=" << shardId << " with Cookie=" << ifCookieEqual.value_or(0)); SendDataToShard(shardId); @@ -933,6 +957,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } void Bootstrap() { + Y_ABORT_UNLESS(false); LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; WriteTableActor = new TKqpTableWriteActor( @@ -1261,6 +1286,18 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub ProcessRequestQueue(); ProcessWrite(); ProcessAckQueue(); + + if (State == EState::FLUSHING) { + //Y_ABORT_UNLESS(DataQueues.empty()); + //Y_ABORT_UNLESS(AckQueue.empty()); + bool isEmpty = true; + for (auto& [_, info] : WriteInfos) { + isEmpty = isEmpty && info.WriteTableActor->IsReady() && info.WriteTableActor->IsEmpty(); + } + if (isEmpty) { + OnFlushed(); + } + } } void ProcessRequestQueue() { @@ -1300,20 +1337,21 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void ProcessAckQueue() { while (!AckQueue.empty()) { - const auto& item = AckQueue.front(); - if (GetTotalFreeSpace() >= item.DataSize) { + //const auto& item = AckQueue.front(); + //if (GetTotalFreeSpace() >= item.DataSize) { auto result = std::make_unique(); result->Token = AckQueue.front().Token; Send(AckQueue.front().ForwardActorId, result.release()); AckQueue.pop(); - } else { - return; - } + //} else { + // return; + //} } } void ProcessWrite() { - const bool needToFlush = GetTotalFreeSpace() <= 0 + //Y_ABORT_UNLESS(GetTotalFreeSpace() <= 0); + const bool needToFlush = /*GetTotalFreeSpace() <= 0*/ false || State == EState::FLUSHING || State == EState::PREPARING || State == EState::COMMITTING @@ -1326,39 +1364,38 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } } - - if (State == EState::FLUSHING) { - bool isEmpty = true; - for (auto& [_, info] : WriteInfos) { - isEmpty &= info.WriteTableActor->IsEmpty(); - } - if (isEmpty) { - OnFlushed(); - } - } } void Flush() { YQL_ENSURE(State == EState::WRITING); State = EState::FLUSHING; + for (auto& [_, queue] : DataQueues) { + Y_ABORT_UNLESS(queue.empty()); + } Process(); } void Prepare(const ui64 txId) { YQL_ENSURE(State == EState::WRITING); State = EState::PREPARING; + for (auto& [_, queue] : DataQueues) { + Y_ABORT_UNLESS(queue.empty()); + } TxId = txId; for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetPrepare(txId); } Close(); Process(); - SendToExternalShards(); + SendToExternalShards(false); } void ImmediateCommit() { YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; + for (auto& [_, queue] : DataQueues) { + Y_ABORT_UNLESS(queue.empty()); + } CA_LOG_D("Start immediate commit"); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetImmediateCommit(); @@ -1370,6 +1407,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void DistributedCommit() { YQL_ENSURE(State == EState::PREPARING); State = EState::COMMITTING; + for (auto& [_, queue] : DataQueues) { + Y_ABORT_UNLESS(queue.empty()); + } CA_LOG_D("Start distributed commit TxId" << *TxId); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetDistributedCommit(); @@ -1379,21 +1419,23 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Rollback() { State = EState::ROLLINGBACK; - SendToExternalShards(); + SendToExternalShards(true); } - void SendToExternalShards() { - const bool isRollback = (State == EState::ROLLINGBACK); - - THashSet ExternalShards = TxManager->GetShards(); - for (auto& [_, info] : WriteInfos) { - for (const auto& shardId : info.WriteTableActor->GetShardsIds()) { - ExternalShards.erase(shardId); + void SendToExternalShards(bool isRollback) { + THashSet shards = TxManager->GetShards(); + if (!isRollback) { + for (auto& [_, info] : WriteInfos) { + for (const auto& shardId : info.WriteTableActor->GetShardsIds()) { + shards.erase(shardId); + } } } - for (const ui64 shardId : ExternalShards) { - + for (const ui64 shardId : shards) { + if (TxManager->GetLocks(shardId).empty()) { + continue; + } auto evWrite = std::make_unique(isRollback ? NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE : NKikimrDataEvents::TEvWrite::MODE_PREPARE); @@ -1777,6 +1819,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Y_UNUSED(preparedInfo, dataSize); if (TxManager->ConsumePrepareTransactionResult(std::move(preparedInfo))) { TxManager->StartExecute(); + Y_ABORT_UNLESS(GetTotalMemory() == 0); DistributedCommit(); return; } @@ -1792,7 +1835,10 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub State = EState::FINISHED; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); ExecuterActorId = {}; + Y_ABORT_UNLESS(GetTotalMemory() == 0); + return; } + //Process(); } void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { @@ -1804,6 +1850,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub State = EState::WRITING; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); ExecuterActorId = {}; + Y_ABORT_UNLESS(GetTotalMemory() == 0); } void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) override { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 4bdc0c0145e9..c01458c1187e 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -914,7 +914,8 @@ class TShardsInfo { dataSize += GetBatch(BatchesInFlight).GetMemory(); ++BatchesInFlight; } - YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).GetMemory() <= maxDataSize); + YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).GetMemory() <= maxDataSize); + Y_ABORT_UNLESS(BatchesInFlight == Batches.size()); } const TBatchWithMetadata& GetBatch(size_t index) const { diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index 0d67c7f02350..700cd0b1858e 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -382,7 +382,7 @@ TString TWriteOperation::GetTxBody() const { } void TWriteOperation::SetTxBody(const TString& txBody) { - Y_ABORT_UNLESS(!WriteRequest); + //Y_ABORT_UNLESS(WriteRequest); NKikimrTxDataShard::TSerializedEvent proto; const bool success = proto.ParseFromString(txBody); From f2034660a3142f95dfd1bb6b92a8ccefddc83d6f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 26 Sep 2024 18:51:53 +0300 Subject: [PATCH 33/69] volatile --- ydb/core/kqp/common/kqp_tx_manager.cpp | 45 +++++++-- ydb/core/kqp/common/kqp_tx_manager.h | 2 + .../kqp/executer_actor/kqp_data_executer.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 34 ++++++- ydb/core/kqp/runtime/kqp_write_table.cpp | 7 +- .../kqp/session_actor/kqp_session_actor.cpp | 13 ++- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 97 +++++++++++++++++++ .../datashard/datashard_write_operation.cpp | 7 +- 8 files changed, 186 insertions(+), 21 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 374f13810d43..9df30136a432 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -84,11 +84,10 @@ class TKqpTransactionManager : public IKqpTransactionManager { } if (broken && !LocksIssue) { - const auto& lockInfo = shardInfo.Locks.at(lock.GetKey()); - if (lockInfo.LocksAcquireFailure) { + if (isLocksAcquireFailure) { LocksIssue = YqlIssue(NYql::TPosition(), NYql::TIssuesIds::KIKIMR_LOCKS_ACQUIRE_FAILURE); return false; - } else if (lockInfo.Invalidated) { + } else if (isInvalidated) { TStringBuilder message; message << "Transaction locks invalidated. Tables: "; bool first = true; @@ -174,6 +173,10 @@ class TKqpTransactionManager : public IKqpTransactionManager { return GetShardsCount() == 0; } + bool IsVolatile() const override { + return true; + } + bool HasSnapshot() const override { return ValidSnapshot; } @@ -204,8 +207,11 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(!IsReadOnly()); for (auto& [shardId, shardInfo] : ShardsInfo) { - if (shardInfo.Flags & EAction::WRITE) { + if ((shardInfo.Flags & EAction::WRITE)) { ReceivingShards.insert(shardId); + if (IsVolatile()) { + SendingShards.insert(shardId); + } } if (!shardInfo.Locks.empty()) { SendingShards.insert(shardId); @@ -215,6 +221,28 @@ class TKqpTransactionManager : public IKqpTransactionManager { shardInfo.State = EShardState::PREPARING; } + Y_ABORT_UNLESS(!ReceivingShards.empty()); + + //ui64 arbiter = 0; + const size_t minArbiterMeshSize = 5; // TODO: make configurable? + if ((IsVolatile() && + ReceivingShards.size() >= minArbiterMeshSize)) + { + std::vector candidates; + candidates.reserve(ReceivingShards.size()); + for (ui64 candidate : ReceivingShards) { + // Note: all receivers are also senders in volatile transactions + if (Y_LIKELY(SendingShards.contains(candidate))) { + candidates.push_back(candidate); + } + } + if (candidates.size() >= minArbiterMeshSize) { + // Select a random arbiter + const ui32 index = RandomNumber(candidates.size()); + Arbiter = candidates.at(index); + } + } + ShardsToWait = ShardsIds; MinStep = std::numeric_limits::min(); @@ -230,7 +258,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { TPrepareInfo result { .SendingShards = SendingShards, .ReceivingShards = ReceivingShards, - .Arbiter = std::nullopt, + .Arbiter = Arbiter, }; return result; @@ -270,6 +298,8 @@ class TKqpTransactionManager : public IKqpTransactionManager { && IsSingleShard())); shardInfo.State = EShardState::EXECUTING; } + + ShardsToWait = ReceivingShards; } TCommitInfo GetCommitInfo() override { @@ -296,9 +326,11 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(shardInfo.State == EShardState::EXECUTING); shardInfo.State = EShardState::FINISHED; + Y_ABORT_UNLESS(ShardsToWait.empty() || !IsSingleShard()); + // Either all shards committed or all shards failed, // so we need to wait only for one answer. - return true; + return ShardsToWait.contains(shardId) || ShardsToWait.empty(); } private: @@ -331,6 +363,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { THashSet SendingShards; THashSet ReceivingShards; + std::optional Arbiter; THashSet ShardsToWait; diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 6d34b3933385..60a16a4c824d 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -54,6 +54,8 @@ class IKqpTransactionManager { virtual bool IsSingleShard() const = 0; virtual bool IsEmpty() const = 0; + virtual bool IsVolatile() const = 0; + virtual bool HasSnapshot() const = 0; virtual void SetHasSnapshot(bool hasSnapshot) = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 10ba23299297..0641c0d21907 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -289,7 +289,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.MutableResponse()->MutableResult()->MutableLocks(), Locks); } - if (!BufferActorId || ReadOnlyTx) { + if (!BufferActorId || (ReadOnlyTx && Request.LocksOp != ELocksOp::Rollback)) { Become(&TKqpDataExecuter::FinalizeState); LOG_D("DON'T SEND ANYTHING " << BufferActorId); MakeResponseAndPassAway(); diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 9559b51f29a4..571aded4971f 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -68,10 +68,12 @@ namespace { for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { protoLocks->AddReceivingShards(receivingShardId); } + //Y_ABORT_UNLESS(prepareSettings.Arbiter); if (prepareSettings.Arbiter) { protoLocks->SetArbiterShard(*prepareSettings.Arbiter); } } else if (prepareSettings.ArbiterColumnShard == shardId) { + Y_ABORT_UNLESS(false); protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); for (const ui64 sendingShardId : prepareSettings.SendingShards) { protoLocks->AddSendingShards(sendingShardId); @@ -80,6 +82,7 @@ namespace { protoLocks->AddReceivingShards(receivingShardId); } } else { + Y_ABORT_UNLESS(false); protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); protoLocks->AddSendingShards(*prepareSettings.ArbiterColumnShard); protoLocks->AddReceivingShards(*prepareSettings.ArbiterColumnShard); @@ -774,7 +777,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { auto evWrite = std::make_unique(); evWrite->Record.SetTxMode(isPrepare - ? NKikimrDataEvents::TEvWrite::MODE_PREPARE + ? NKikimrDataEvents::TEvWrite::MODE_VOLATILE_PREPARE //NKikimrDataEvents::TEvWrite::MODE_PREPARE : NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); if (isImmediateCommit) { @@ -813,7 +816,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { Send( PipeCacheId, new TEvPipeCache::TEvForward(evWrite.release(), shardId, true), - 0, + IEventHandle::FlagTrackDelivery, metadata->Cookie); ShardedWriteController->OnMessageSent(shardId, metadata->Cookie); @@ -1249,7 +1252,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub auto& writeInfo = WriteInfos[settings.TableId]; if (!writeInfo.WriteTableActor) { - CA_LOG_D("Create new TableWriteActor for table `" << settings.TablePath << "` (" << settings.TableId << "). lockId=" << LockTxId); writeInfo.WriteTableActor = new TKqpTableWriteActor( this, settings.TableId, @@ -1261,6 +1263,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Alloc, TxManager); writeInfo.WriteTableActorId = RegisterWithSameMailbox(writeInfo.WriteTableActor); + CA_LOG_D("Create new TableWriteActor for table `" << settings.TablePath << "` (" << settings.TableId << "). lockId=" << LockTxId << " " << writeInfo.WriteTableActorId); } auto cookie = writeInfo.WriteTableActor->Open(settings.OperationType, std::move(settings.Columns)); @@ -1305,6 +1308,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub auto& writeInfo = WriteInfos.at(tableId); if (!writeInfo.WriteTableActor->IsReady()) { + CA_LOG_D("ProcessRequestQueue " << tableId << " NOT READY queue=" << queue.size()); return; } @@ -1341,6 +1345,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub //if (GetTotalFreeSpace() >= item.DataSize) { auto result = std::make_unique(); result->Token = AckQueue.front().Token; + CA_LOG_D("ProcessAckQueue ACK" << AckQueue.front().ForwardActorId); Send(AckQueue.front().ForwardActorId, result.release()); AckQueue.pop(); //} else { @@ -1359,7 +1364,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub if (needToFlush) { for (auto& [_, info] : WriteInfos) { + CA_LOG_D("FLUSH TEST"); if (info.WriteTableActor->IsReady()) { + CA_LOG_D("FLUSH READY"); info.WriteTableActor->Flush(); } } @@ -1367,6 +1374,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Flush() { + CA_LOG_D("Start FLUSHING"); YQL_ENSURE(State == EState::WRITING); State = EState::FLUSHING; for (auto& [_, queue] : DataQueues) { @@ -1376,6 +1384,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Prepare(const ui64 txId) { + CA_LOG_D("Start PREPARE"); YQL_ENSURE(State == EState::WRITING); State = EState::PREPARING; for (auto& [_, queue] : DataQueues) { @@ -1391,6 +1400,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ImmediateCommit() { + CA_LOG_D("Start COMMIT I"); YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; for (auto& [_, queue] : DataQueues) { @@ -1405,6 +1415,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void DistributedCommit() { + CA_LOG_D("Start COMMIT D"); YQL_ENSURE(State == EState::PREPARING); State = EState::COMMITTING; for (auto& [_, queue] : DataQueues) { @@ -1418,6 +1429,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Rollback() { + CA_LOG_D("Start ROLLBACK"); State = EState::ROLLINGBACK; SendToExternalShards(true); } @@ -1438,7 +1450,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } auto evWrite = std::make_unique(isRollback ? NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE - : NKikimrDataEvents::TEvWrite::MODE_PREPARE); + : NKikimrDataEvents::TEvWrite::MODE_VOLATILE_PREPARE); //NKikimrDataEvents::TEvWrite::MODE_PREPARE); if (isRollback) { FillEvWriteRollback(evWrite.get(), shardId, TxManager); @@ -1484,6 +1496,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub transaction.SetTxId(*TxId); transaction.SetMinStep(commitInfo.MinStep); transaction.SetMaxStep(commitInfo.MaxStep); + transaction.SetFlags(TEvTxProxy::TEvProposeTransaction::FlagVolatile); for (const auto& shardInfo : commitInfo.ShardsInfo) { auto& item = *affectedSet.Add(); @@ -1492,7 +1505,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } //TODO: NDataIntegrity & Volatile - CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator); + CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator << " volitale=" << ((transaction.GetFlags() & TEvTxProxy::TEvProposeTransaction::FlagVolatile) != 0)); Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(ev.Release(), commitInfo.Coordinator, /* subscribe */ true)); } @@ -1772,6 +1785,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void ProcessWritePreparedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { if (State != EState::PREPARING) { + CA_LOG_D("ProcessWritePreparedShard: ignored"); return; } const auto& record = ev->Get()->Record; @@ -1792,6 +1806,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void ProcessWriteCompletedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { if (State != EState::COMMITTING) { + CA_LOG_D("ProcessWriteCompletedShard: ignored"); return; } CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() @@ -1814,6 +1829,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnPrepared(IKqpTransactionManager::TPrepareResult&& preparedInfo, ui64 dataSize) override { if (State != EState::PREPARING) { + CA_LOG_D("OnPrepared: ignored"); return; } Y_UNUSED(preparedInfo, dataSize); @@ -1828,6 +1844,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnCommitted(ui64 shardId, ui64 dataSize) override { if (State != EState::COMMITTING) { + CA_LOG_D("OnCommitted: ignored"); return; } Y_UNUSED(shardId, dataSize); @@ -1950,6 +1967,7 @@ class TKqpForwardWriteActor : public TActorBootstrapped, } void Handle(TEvBufferWriteResult::TPtr& result) { + CA_LOG_D("TKqpForwardWriteActor Recv from=" << BufferActorId); EgressStats.Bytes += DataSize; EgressStats.Chunks++; EgressStats.Splits++; @@ -1963,8 +1981,11 @@ class TKqpForwardWriteActor : public TActorBootstrapped, } if (Closed) { + CA_LOG_D("TKqpForwardWriteActor FINISH"); Callbacks->OnAsyncOutputFinished(GetOutputIndex()); + return; } + CA_LOG_D("TKqpForwardWriteActor RESUME free=" << GetFreeSpace()); Callbacks->ResumeExecution(); } @@ -1998,6 +2019,7 @@ class TKqpForwardWriteActor : public TActorBootstrapped, }; } + CA_LOG_D("TKqpForwardWriteActor SEND data=" << DataSize << " closed=" << Closed); AFL_ENSURE(Send(BufferActorId, ev.release())); } @@ -2032,12 +2054,14 @@ class TKqpForwardWriteActor : public TActorBootstrapped, Data->emplace_back(std::move(data)); DataSize += size; + CA_LOG_D("TKqpForwardWriteActor ADD DATA : " << size << " / " << DataSize); if (Closed || GetFreeSpace() <= 0) { WriteToBuffer(); } } void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { + CA_LOG_E("TKqpForwardWriteActor ERROR : " << message); NYql::TIssue issue(message); for (const auto& i : subIssues) { issue.AddSubIssue(MakeIntrusive(i)); diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index c01458c1187e..5cdac8ec1b81 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -908,9 +908,10 @@ class TShardsInfo { void MakeNextBatches(i64 maxDataSize, ui64 maxCount) { YQL_ENSURE(BatchesInFlight == 0); i64 dataSize = 0; - while (BatchesInFlight < maxCount - && BatchesInFlight < Batches.size() - && dataSize + GetBatch(BatchesInFlight).GetMemory() <= maxDataSize) { + Y_UNUSED(dataSize, maxCount); + while (/*BatchesInFlight < maxCount + && */BatchesInFlight < Batches.size() + /*&& dataSize + GetBatch(BatchesInFlight).GetMemory() <= maxDataSize()*/) { dataSize += GetBatch(BatchesInFlight).GetMemory(); ++BatchesInFlight; } diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index fe0873c942b3..5e4a717c1417 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -321,7 +321,7 @@ class TKqpSessionActor : public TActorBootstrapped { } // TODO: support buffer actor - bool replied = ExecutePhyTx(/*tx*/ nullptr, /*commit*/ true, false); + bool replied = ExecutePhyTx(/*tx*/ nullptr, /*commit*/ true); if (!replied) { Become(&TKqpSessionActor::ExecuteState); } @@ -1093,11 +1093,10 @@ class TKqpSessionActor : public TActorBootstrapped { if (QueryState->TxCtx->ShouldExecuteDeferredEffects()) { ExecuteDeferredEffectsImmediately(); } else if (auto commit = QueryState->ShouldCommitWithCurrentTx(tx); commit || tx) { - ExecutePhyTx(tx, commit, false); + ExecutePhyTx(tx, commit); } else { ReplySuccess(); } - //} } void ExecuteDeferredEffectsImmediately() { @@ -1123,7 +1122,7 @@ class TKqpSessionActor : public TActorBootstrapped { SendToExecuter(QueryState->TxCtx.Get(), std::move(request)); } - bool ExecutePhyTx(const TKqpPhyTxHolder::TConstPtr& tx, bool commit, bool) { + bool ExecutePhyTx(const TKqpPhyTxHolder::TConstPtr& tx, bool commit) { if (tx) { switch (tx->GetType()) { case NKqpProto::TKqpPhyTx::TYPE_SCHEME: @@ -1363,7 +1362,11 @@ class TKqpSessionActor : public TActorBootstrapped { auto ev = std::make_unique(exId); Send(MakeTxProxyID(), ev.release()); if (!isRollback) { - Y_ABORT_UNLESS(!ExecuterId); + if (ExecuterId) { + LOG_E("ERROR KQP EXETUTER: new=" << exId << " old= " << ExecuterId << " "); + //Sleep(TDuration::Seconds(1)); + Y_ABORT_UNLESS(!ExecuterId); + } } ExecuterId = exId; } diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index d83e95cc4d4b..991d4e21dd40 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -4252,6 +4252,103 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } + Y_UNIT_TEST(TableSink_ReplaceDataShard_INTERACTIVE) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + + const TString query = R"( + CREATE TABLE `/Root/DataShard` ( + Col1 Uint32 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + WITH ( + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 16, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 16, + UNIFORM_PARTITIONS = 16); + + CREATE TABLE `/Root/DataShard2` ( + Col1 Uint32 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + WITH ( + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 17, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 17, + UNIFORM_PARTITIONS = 17); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + auto client = kikimr.GetQueryClient(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto tx = session2.BeginTransaction(NYdb::NQuery::TTxSettings::SerializableRW()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + { + auto prepareResult = session2.ExecuteQuery(R"( + REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES + (10u, "test1", 10), (20u, "test2", 11), (2147483647u, "test3", 12), (2147483640u, NULL, 13); + )", TTxControl::Tx(tx.GetId()), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto prepareResult = session2.ExecuteQuery(R"( + REPLACE INTO `/Root/DataShard2` (Col1, Col2, Col3) VALUES + (11u, "test1", 10), (21u, "test2", 11), (2147483646u, "test3", 12), (2147483641u, NULL, 13); + )", TTxControl::Tx(tx.GetId()), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto it = session2.StreamExecuteQuery(R"( + SELECT COUNT(*) FROM `/Root/DataShard`; + )", TTxControl::Tx(tx.GetId()), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[4u]])"); + } + + { + auto prepareResult = session2.ExecuteQuery(R"( + REPLACE INTO `/Root/DataShard2` (Col1, Col2, Col3) VALUES + (11u, "test1", 10), (21u, "test2", 11), (2147483646u, "test3", 12), (2147483641u, NULL, 13); + )", TTxControl::Tx(tx.GetId()), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto commitResult = tx.Commit().ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); + } + + //{ + // auto prepareResult = client.ExecuteQuery(R"( + // REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; + // )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + // UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + //} + + } + Y_UNIT_TEST(ReadDatashardAndColumnshard) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index 700cd0b1858e..e57c8b7eef91 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -382,7 +382,12 @@ TString TWriteOperation::GetTxBody() const { } void TWriteOperation::SetTxBody(const TString& txBody) { - //Y_ABORT_UNLESS(WriteRequest); + if (WriteRequest) { + LOG_E("TWriteOperation ERROR:: " << NKikimrDataEvents::TEvWrite_ETxMode_Name(WriteRequest->Record.GetTxMode()) << " " + << NKikimrDataEvents::TKqpLocks_ELocksOp_Name(WriteRequest->Record.GetLocks().GetOp()) << " " << WriteRequest->Record.GetLocks().LocksSize() << " " << WriteRequest->Record.GetOperations().size() + << " "); + } + Y_ABORT_UNLESS(!WriteRequest); NKikimrTxDataShard::TSerializedEvent proto; const bool success = proto.ParseFromString(txBody); From d30d6010f044a8f7368e424004cd2ca409a4838d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 26 Sep 2024 18:54:22 +0300 Subject: [PATCH 34/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 5e4a717c1417..1aad859f5459 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1365,7 +1365,7 @@ class TKqpSessionActor : public TActorBootstrapped { if (ExecuterId) { LOG_E("ERROR KQP EXETUTER: new=" << exId << " old= " << ExecuterId << " "); //Sleep(TDuration::Seconds(1)); - Y_ABORT_UNLESS(!ExecuterId); + //Y_ABORT_UNLESS(!ExecuterId); } } ExecuterId = exId; From f627f592a693bf6adc044d8bcdd2da1b9af26bbe Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 27 Sep 2024 11:56:20 +0300 Subject: [PATCH 35/69] fix --- ydb/core/kqp/common/simple/kqp_event_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/common/simple/kqp_event_ids.h b/ydb/core/kqp/common/simple/kqp_event_ids.h index 4fc1b9b87dbd..d3c6f3f2fd28 100644 --- a/ydb/core/kqp/common/simple/kqp_event_ids.h +++ b/ydb/core/kqp/common/simple/kqp_event_ids.h @@ -45,7 +45,7 @@ struct TKqpEvents { EvListProxyNodesRequest, EvListProxyNodesResponse, EvUpdateDatabaseInfo, - EvDelayedRequestError + EvDelayedRequestError, EvBufferWrite, EvBufferWriteResult, }; From e7e70073c67924ab79e0678a9a23dc012f7d75a7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 27 Sep 2024 14:09:02 +0300 Subject: [PATCH 36/69] disable-resharding --- ydb/core/kqp/runtime/kqp_write_table.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 5cdac8ec1b81..034b2d189707 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -1066,6 +1066,7 @@ class TShardsInfo { class TShardedWriteController : public IShardedWriteController { public: void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { + Y_ABORT_UNLESS(!SchemeEntry); SchemeEntry = schemeEntry; BeforePartitioningChanged(); for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { @@ -1080,6 +1081,7 @@ class TShardedWriteController : public IShardedWriteController { void OnPartitioningChanged( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { + Y_ABORT_UNLESS(!SchemeEntry); SchemeEntry = schemeEntry; PartitionsEntry = std::move(partitionsEntry); BeforePartitioningChanged(); @@ -1094,6 +1096,7 @@ class TShardedWriteController : public IShardedWriteController { } void BeforePartitioningChanged() { + return; for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { auto& writeInfo = WriteInfos.at(token); if (writeInfo.Serializer) { @@ -1107,6 +1110,7 @@ class TShardedWriteController : public IShardedWriteController { } void AfterPartitioningChanged() { + return; if (!WriteInfos.empty()) { ShardsInfo.Close(); ReshardData(); @@ -1387,6 +1391,7 @@ class TShardedWriteController : public IShardedWriteController { } void ReshardData() { + return; for (auto& [_, shardInfo] : ShardsInfo.GetShards()) { for (size_t index = 0; index < shardInfo.Size(); ++index) { const auto& batch = shardInfo.GetBatch(index); From 39e25cc3e10235117ae8d1adf7b8502c5ad1981a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 27 Sep 2024 16:25:09 +0300 Subject: [PATCH 37/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 1aad859f5459..4886f4c4d353 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1978,6 +1978,9 @@ class TKqpSessionActor : public TActorBootstrapped { } void Reply() { + if (!QueryState) { + return; + } YQL_ENSURE(QueryState); YQL_ENSURE(Counters); From b100c2f43b1e8a3ca04cc00da24a1d354c27fab1 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 30 Sep 2024 16:57:08 +0300 Subject: [PATCH 38/69] improvements-control --- ydb/core/kqp/common/kqp_tx_manager.cpp | 38 ++++++---- ydb/core/kqp/common/kqp_tx_manager.h | 1 + .../kqp/executer_actor/kqp_data_executer.cpp | 44 ++++++----- .../kqp/executer_actor/kqp_executer_impl.h | 2 +- ydb/core/kqp/executer_actor/kqp_planner.cpp | 2 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 1 - ydb/core/kqp/session_actor/kqp_query_state.h | 15 ++-- .../kqp/session_actor/kqp_session_actor.cpp | 75 +++++++------------ ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 20 ++--- .../datashard/datashard_write_operation.cpp | 7 -- 10 files changed, 93 insertions(+), 112 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 9df30136a432..931978a7242f 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -12,7 +12,7 @@ struct TKqpLock { TKey GetKey() const { return std::make_tuple(Proto.GetLockId(), Proto.GetDataShard(), Proto.GetSchemeShard(), Proto.GetPathId()); } bool Invalidated(const TKqpLock& newLock) const { - YQL_ENSURE(GetKey() == newLock.GetKey()); + AFL_ENSURE(GetKey() == newLock.GetKey()); return Proto.GetGeneration() != newLock.Proto.GetGeneration() || Proto.GetCounter() != newLock.Proto.GetCounter(); } @@ -91,7 +91,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { TStringBuilder message; message << "Transaction locks invalidated. Tables: "; bool first = true; - // TODO: add error by lock key (pathid) + // TODO: add error by pathid for (const auto& path : shardInfo.Pathes) { if (!first) { message << ", "; @@ -102,7 +102,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { LocksIssue = YqlIssue(NYql::TPosition(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message); return false; } - YQL_ENSURE(false); + AFL_ENSURE(false); } return true; @@ -173,6 +173,15 @@ class TKqpTransactionManager : public IKqpTransactionManager { return GetShardsCount() == 0; } + bool HasLocks() const override { + for (const auto& [_, shardInfo] : ShardsInfo) { + if (!shardInfo.Locks.empty()) { + return true; + } + } + return false; + } + bool IsVolatile() const override { return true; } @@ -202,7 +211,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } void StartPrepare() override { - YQL_ENSURE(!CollectOnly); + AFL_ENSURE(!CollectOnly); AFL_ENSURE(State == ETransactionState::COLLECTING); AFL_ENSURE(!IsReadOnly()); @@ -223,8 +232,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { Y_ABORT_UNLESS(!ReceivingShards.empty()); - //ui64 arbiter = 0; - const size_t minArbiterMeshSize = 5; // TODO: make configurable? + constexpr size_t minArbiterMeshSize = 5; if ((IsVolatile() && ReceivingShards.size() >= minArbiterMeshSize)) { @@ -243,7 +251,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } } - ShardsToWait = ShardsIds; + ShardsToWaitPrepare = ShardsIds; MinStep = std::numeric_limits::min(); MaxStep = std::numeric_limits::max(); @@ -270,7 +278,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(shardInfo.State == EShardState::PREPARING); shardInfo.State = EShardState::PREPARED; - ShardsToWait.erase(result.ShardId); + ShardsToWaitPrepare.erase(result.ShardId); MinStep = std::max(MinStep, result.MinStep); MaxStep = std::min(MaxStep, result.MaxStep); @@ -281,11 +289,11 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(Coordinator && Coordinator == result.Coordinator)("prev_coordinator", Coordinator)("new_coordinator", result.Coordinator); - return ShardsToWait.empty(); + return ShardsToWaitPrepare.empty(); } void StartExecute() override { - YQL_ENSURE(!CollectOnly); + AFL_ENSURE(!CollectOnly); AFL_ENSURE(State == ETransactionState::PREPARING || (State == ETransactionState::COLLECTING && IsSingleShard())); @@ -299,7 +307,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { shardInfo.State = EShardState::EXECUTING; } - ShardsToWait = ReceivingShards; + AFL_ENSURE(ReceivingShards.empty() || !IsSingleShard()); } TCommitInfo GetCommitInfo() override { @@ -326,11 +334,9 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(shardInfo.State == EShardState::EXECUTING); shardInfo.State = EShardState::FINISHED; - Y_ABORT_UNLESS(ShardsToWait.empty() || !IsSingleShard()); - // Either all shards committed or all shards failed, - // so we need to wait only for one answer. - return ShardsToWait.contains(shardId) || ShardsToWait.empty(); + // so we need to wait only for one answer from ReceivingShards. + return ReceivingShards.contains(shardId) || IsSingleShard(); } private: @@ -365,7 +371,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { THashSet ReceivingShards; std::optional Arbiter; - THashSet ShardsToWait; + THashSet ShardsToWaitPrepare; ui64 MinStep = 0; ui64 MaxStep = 0; diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 60a16a4c824d..2fb48ecc6562 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -53,6 +53,7 @@ class IKqpTransactionManager { virtual bool IsReadOnly() const = 0; virtual bool IsSingleShard() const = 0; virtual bool IsEmpty() const = 0; + virtual bool HasLocks() const = 0; virtual bool IsVolatile() const = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 0641c0d21907..184fac4b8091 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -139,7 +139,6 @@ class TKqpDataExecuter : public TKqpExecuterBaseAdd(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); if (TxManager) { YQL_ENSURE(stageInfo.Meta.TableKind == ETableKind::Olap); - // TODO: + TxManager->AddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); + TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); + TxManager->AddLock(lock.GetDataShard(), lock); } } } @@ -277,18 +279,10 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); if (TxManager) { TxManager->SetHasSnapshot(GetSnapshot().IsValid()); } - if (!Locks.empty() || (TxManager && !TxManager->IsEmpty())) { - if (LockHandle) { - ResponseEv->LockHandle = std::move(LockHandle); - } - BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); - } - if (!BufferActorId || (ReadOnlyTx && Request.LocksOp != ELocksOp::Rollback)) { Become(&TKqpDataExecuter::FinalizeState); LOG_D("DON'T SEND ANYTHING " << BufferActorId); @@ -334,6 +328,15 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); + + if (!Locks.empty() || (TxManager && TxManager->HasLocks())) { + if (LockHandle) { + ResponseEv->LockHandle = std::move(LockHandle); + } + BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); + } + auto resultSize = ResponseEv->GetByteSize(); if (resultSize > (int)ReplySizeLimit) { TString message; @@ -985,6 +988,7 @@ class TKqpDataExecuter : public TKqpExecuterBase(); @@ -1538,7 +1543,7 @@ class TKqpDataExecuter : public TKqpExecuterBase TTask& { - YQL_ENSURE(!UseEvWriteForOltp); + YQL_ENSURE(!TxManager); auto it = shardTasks.find(shardId); if (it != shardTasks.end()) { return TasksGraph.GetTask(it->second); @@ -1678,7 +1683,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseGet(shardId).IsOlap) { + if (TxManager || ShardIdToTableInfo->Get(shardId).IsOlap) { if (auto it = evWriteTxs.find(shardId); it != evWriteTxs.end()) { locks = it->second->MutableLocks(); } else { @@ -2725,7 +2731,9 @@ class TKqpDataExecuter : public TKqpExecuterBase writeId; if (Request.TopicOperations.HasWriteId()) { writeId = Request.TopicOperations.GetWriteId(); @@ -2927,7 +2934,6 @@ class TKqpDataExecuter : public TKqpExecuterBase FederatedQuerySetup; const TGUCSettings::TPtr GUCSettings; TShardIdToTableInfoPtr ShardIdToTableInfo; diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index bd3caadfa7b2..469735757373 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -511,7 +511,7 @@ class TKqpExecuterBase : public TActorBootstrapped { } if (BufferActorId && Request.LocksOp == ELocksOp::Rollback) { - //YQL_ENSURE(Request.Transactions.empty()); + YQL_ENSURE(Request.Transactions.empty()); static_cast(this)->Finalize(); return; } diff --git a/ydb/core/kqp/executer_actor/kqp_planner.cpp b/ydb/core/kqp/executer_actor/kqp_planner.cpp index a508b695df0b..ba6cc4adacff 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.cpp +++ b/ydb/core/kqp/executer_actor/kqp_planner.cpp @@ -546,7 +546,7 @@ std::unique_ptr TKqpPlanner::PlanExecution() { ComputeTasks.clear(); } - if ((nComputeTasks == 0 && TasksPerNode.size() == 1 && (AsyncIoFactory != nullptr) && AllowSinglePartitionOpt)) { + if (nComputeTasks == 0 && TasksPerNode.size() == 1 && (AsyncIoFactory != nullptr) && AllowSinglePartitionOpt) { // query affects a single key or shard, so it might be more effective // to execute this task locally so we can avoid useless overhead for remote task launching. for (auto& [shardId, tasks]: TasksPerNode) { diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 571aded4971f..e289956789c6 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -960,7 +960,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } void Bootstrap() { - Y_ABORT_UNLESS(false); LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; WriteTableActor = new TKqpTableWriteActor( diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index b11ac32fa751..b0b3378328c0 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -347,10 +347,10 @@ class TKqpQueryState : public TNonCopyable { return true; } - /*if (HasTxSinkInTx(tx)) { - // Sink results can't be committed with changes + if (TxCtx->HasOlapTable) { + // Olap sink results can't be committed with changes return false; - }*/ + } if (TxCtx->HasOlapTable) { // HTAP/OLAP transactions always use separate commit. @@ -372,12 +372,13 @@ class TKqpQueryState : public TNonCopyable { } bool ShouldAcquireLocks(const TKqpPhyTxHolder::TConstPtr& tx) { + Y_UNUSED(tx); if (*TxCtx->EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_SERIALIZABLE) { return false; } // Inconsistent writes (CTAS) don't require locks. - if (IsSplitted() && !HasTxSinkInTx(tx)) { + if (IsSplitted()) { return false; } @@ -418,7 +419,7 @@ class TKqpQueryState : public TNonCopyable { if (TxCtx->CanDeferEffects()) { // At current time sinks require separate tnx with commit. - while (tx && tx->GetHasEffects() /*&& !HasTxSinkInTx(tx)*/) { + while (tx && tx->GetHasEffects() && TxCtx->HasOlapTable) { QueryData->CreateKqpValueMap(tx); bool success = TxCtx->AddDeferredEffect(tx, QueryData); YQL_ENSURE(success); @@ -435,7 +436,7 @@ class TKqpQueryState : public TNonCopyable { return tx; } - bool HasTxSinkInStage(const ::NKqpProto::TKqpPhyStage& stage) const { + /*bool HasOnlyInonsistentSinkInStage(const ::NKqpProto::TKqpPhyStage& stage) const { for (const auto& sink : stage.GetSinks()) { if (sink.GetTypeCase() == NKqpProto::TKqpSink::kInternalSink && sink.GetInternalSink().GetSettings().Is()) { NKikimrKqp::TKqpTableSinkSettings settings; @@ -467,7 +468,7 @@ class TKqpQueryState : public TNonCopyable { } } return false; - } + }*/ bool HasTxControl() const { return RequestEv->HasTxControl(); diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 4886f4c4d353..a6136e929474 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -320,7 +320,6 @@ class TKqpSessionActor : public TActorBootstrapped { return; } - // TODO: support buffer actor bool replied = ExecutePhyTx(/*tx*/ nullptr, /*commit*/ true); if (!replied) { Become(&TKqpSessionActor::ExecuteState); @@ -1037,7 +1036,9 @@ class TKqpSessionActor : public TActorBootstrapped { bool CheckTransactionLocks(const TKqpPhyTxHolder::TConstPtr& tx) { auto& txCtx = *QueryState->TxCtx; - const bool broken = txCtx.TxManager ? !!txCtx.TxManager->GetLockIssue() : txCtx.Locks.Broken(); + const bool broken = txCtx.TxManager + ? !!txCtx.TxManager->GetLockIssue() + : txCtx.Locks.Broken(); if (!txCtx.DeferredEffects.Empty() && broken) { ReplyQueryError(Ydb::StatusIds::ABORTED, "tx has deferred effects, but locks are broken", @@ -1100,7 +1101,7 @@ class TKqpSessionActor : public TActorBootstrapped { } void ExecuteDeferredEffectsImmediately() { - //YQL_ENSURE(QueryState->TxCtx->ShouldExecuteDeferredEffects()); + YQL_ENSURE(QueryState->TxCtx->ShouldExecuteDeferredEffects()); auto& txCtx = *QueryState->TxCtx; auto request = PrepareRequest(/* tx */ nullptr, /* literal */ false, QueryState.get()); @@ -1154,9 +1155,9 @@ class TKqpSessionActor : public TActorBootstrapped { auto& txCtx = *QueryState->TxCtx; bool literal = tx && tx->IsLiteralTx(); + const bool hasLocks = txCtx.TxManager ? txCtx.TxManager->HasLocks() : txCtx.Locks.HasLocks(); if (commit) { - const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : txCtx.Locks.HasLocks(); if (txCtx.TxHasEffects() || hasLocks || txCtx.TopicOperations.HasOperations()) { // Cannot perform commit in literal execution literal = false; @@ -1221,10 +1222,10 @@ class TKqpSessionActor : public TActorBootstrapped { request.PerShardKeysSizeLimitBytes = Config->_CommitPerShardKeysSizeLimitBytes.Get().GetRef(); } - request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); - if (Settings.TableService.GetEnableOltpSink()) { - const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : false; + //if (txCtx.TxHasEffects()) { + request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); + //} if (hasLocks) { if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { @@ -1239,8 +1240,8 @@ class TKqpSessionActor : public TActorBootstrapped { request.LocksOp = ELocksOp::Commit; } } else { - const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->IsEmpty() : txCtx.Locks.HasLocks(); - if (hasLocks || txCtx.TopicOperations.HasOperations() || Settings.TableService.GetEnableOltpSink()) { + const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->HasLocks() : txCtx.Locks.HasLocks(); + if (hasLocks || txCtx.TopicOperations.HasOperations()) { if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { LOG_D("TExecPhysicalRequest, tx has commit locks"); request.LocksOp = ELocksOp::Commit; @@ -1248,33 +1249,14 @@ class TKqpSessionActor : public TActorBootstrapped { LOG_D("TExecPhysicalRequest, tx has rollback locks"); request.LocksOp = ELocksOp::Rollback; } - - if (!Settings.TableService.GetEnableOltpSink()) { - for (auto& [lockId, lock] : txCtx.Locks.LocksMap) { - auto dsLock = ExtractLock(lock.GetValueRef(txCtx.Locks.LockType)); - request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); - } - } else { - // TODO: support for non buffer actor writes + for (auto& [lockId, lock] : txCtx.Locks.LocksMap) { + auto dsLock = ExtractLock(lock.GetValueRef(txCtx.Locks.LockType)); + request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); } } } request.TopicOperations = std::move(txCtx.TopicOperations); - /*} else if (executeDeferred && !txCtx.DeferredEffects.Empty()) { - for (const auto& effect : txCtx.DeferredEffects) { - request.Transactions.emplace_back(effect.PhysicalTx, effect.Params); - - LOG_D("TExecPhysicalRequest, add DeferredEffect to Transaction," - << " current Transactions.size(): " << request.Transactions.size()); - } - - request.PerShardKeysSizeLimitBytes = Config->_CommitPerShardKeysSizeLimitBytes.Get().GetRef(); - request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); - request.UseImmediateEffects = true; - - txCtx.HasImmediateEffects = true; - txCtx.ClearDeferredEffects();*/ } else if (QueryState->ShouldAcquireLocks(tx) && (!txCtx.HasOlapTable || Settings.TableService.GetEnableOlapSink())) { request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); @@ -1338,7 +1320,7 @@ class TKqpSessionActor : public TActorBootstrapped { request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); - if (Settings.TableService.GetEnableOltpSink()) { + if (Settings.TableService.GetEnableOltpSink() && !txCtx->TxManager) { txCtx->TxManager = CreateKqpTransactionManager(); } if (Settings.TableService.GetEnableOltpSink() && !txCtx->BufferActorId) { @@ -1362,11 +1344,7 @@ class TKqpSessionActor : public TActorBootstrapped { auto ev = std::make_unique(exId); Send(MakeTxProxyID(), ev.release()); if (!isRollback) { - if (ExecuterId) { - LOG_E("ERROR KQP EXETUTER: new=" << exId << " old= " << ExecuterId << " "); - //Sleep(TDuration::Seconds(1)); - //Y_ABORT_UNLESS(!ExecuterId); - } + Y_ABORT_UNLESS(!ExecuterId); } ExecuterId = exId; } @@ -1516,12 +1494,14 @@ class TKqpSessionActor : public TActorBootstrapped { // Invalidate query cache on scheme/internal errors switch (status) { case Ydb::StatusIds::ABORTED: { - if (ev->BrokenLockPathId && !QueryState->TxCtx->TxManager) { + if (QueryState->TxCtx->TxManager && QueryState->TxCtx->TxManager->BrokenLocks()) { + issues.AddIssue(*QueryState->TxCtx->TxManager->GetLockIssue()); + } else if (ev->BrokenLockPathId) { + YQL_ENSURE(!QueryState->TxCtx->TxManager); issues.AddIssue(GetLocksInvalidatedIssue(*QueryState->TxCtx, *ev->BrokenLockPathId)); - } else if (ev->BrokenLockShardId && !QueryState->TxCtx->TxManager) { + } else if (ev->BrokenLockShardId) { + YQL_ENSURE(!QueryState->TxCtx->TxManager); issues.AddIssue(GetLocksInvalidatedIssue(*QueryState->TxCtx->ShardIdToTableInfo, *ev->BrokenLockShardId)); - } else if (QueryState->TxCtx->TxManager && QueryState->TxCtx->TxManager->BrokenLocks()) { - issues.AddIssue(*QueryState->TxCtx->TxManager->GetLockIssue()); } break; } @@ -1978,10 +1958,7 @@ class TKqpSessionActor : public TActorBootstrapped { } void Reply() { - if (!QueryState) { - return; - } - YQL_ENSURE(QueryState); + Y_ABORT_UNLESS(QueryState); YQL_ENSURE(Counters); auto& record = QueryResponse->Record.GetRef(); @@ -2125,15 +2102,14 @@ class TKqpSessionActor : public TActorBootstrapped { request.LocksOp = ELocksOp::Rollback; - // Should tx with empty LocksMap be aborted? if (!txCtx->TxManager) { + // Should tx with empty LocksMap be aborted? for (auto& [lockId, lock] : txCtx->Locks.LocksMap) { auto dsLock = ExtractLock(lock.GetValueRef(txCtx->Locks.LockType)); request.DataShardLocks[dsLock.GetDataShard()].emplace_back(dsLock); } - } else { - // TODO: support buffer actor } + SendToExecuter(txCtx, std::move(request), true); } @@ -2143,7 +2119,7 @@ class TKqpSessionActor : public TActorBootstrapped { QueryState->TxCtx->Locks.Clear(); QueryState->TxCtx->Finish(); - QueryState->TxCtx->TxManager = nullptr; + QueryState->TxCtx->TxManager.reset(); if (QueryState->TxCtx->BufferActorId) { Send(QueryState->TxCtx->BufferActorId, new TEvKqpBuffer::TEvTerminate{}); QueryState->TxCtx->BufferActorId = {}; @@ -2523,7 +2499,6 @@ class TKqpSessionActor : public TActorBootstrapped { hFunc(TEvents::TEvUndelivered, HandleNoop); hFunc(TEvKqpSnapshot::TEvCreateSnapshotResponse, Handle); hFunc(NWorkload::TEvContinueRequest, HandleNoop); - hFunc(TEvKqpBuffer::TEvError, Handle); } } catch (const yexception& ex) { InternalError(ex.what()); diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 991d4e21dd40..efee6ba58a12 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3623,7 +3623,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } - Y_UNIT_TEST(TableSink_ReplaceColumnShard) { + Y_UNIT_TEST(TableSink_Olap_Replace) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); auto settings = TKikimrSettings() @@ -4152,7 +4152,8 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } - Y_UNIT_TEST_TWIN(TableSink_ReplaceDataShard, UseSink) { + Y_UNIT_TEST_TWIN(TableSink_Oltp_Replace, UseSink) { + //UseSink = true; NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(UseSink); appConfig.MutableTableServiceConfig()->SetEnableOltpSink(UseSink); @@ -4252,7 +4253,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } - Y_UNIT_TEST(TableSink_ReplaceDataShard_INTERACTIVE) { + Y_UNIT_TEST(TableSink_OltpInteractive) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); @@ -4340,13 +4341,12 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); } - //{ - // auto prepareResult = client.ExecuteQuery(R"( - // REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; - // )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); - // UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); - //} - + { + auto prepareResult = client.ExecuteQuery(R"( + REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } } Y_UNIT_TEST(ReadDatashardAndColumnshard) { diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index e57c8b7eef91..5581882efdbb 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -382,13 +382,6 @@ TString TWriteOperation::GetTxBody() const { } void TWriteOperation::SetTxBody(const TString& txBody) { - if (WriteRequest) { - LOG_E("TWriteOperation ERROR:: " << NKikimrDataEvents::TEvWrite_ETxMode_Name(WriteRequest->Record.GetTxMode()) << " " - << NKikimrDataEvents::TKqpLocks_ELocksOp_Name(WriteRequest->Record.GetLocks().GetOp()) << " " << WriteRequest->Record.GetLocks().LocksSize() << " " << WriteRequest->Record.GetOperations().size() - << " "); - } - Y_ABORT_UNLESS(!WriteRequest); - NKikimrTxDataShard::TSerializedEvent proto; const bool success = proto.ParseFromString(txBody); Y_ABORT_UNLESS(success); From 26ed62ec9eef17e0536fccf304e1d67f216a96ae Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 30 Sep 2024 17:00:43 +0300 Subject: [PATCH 39/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index a6136e929474..1c9c0268c466 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1223,9 +1223,9 @@ class TKqpSessionActor : public TActorBootstrapped { } if (Settings.TableService.GetEnableOltpSink()) { - //if (txCtx.TxHasEffects()) { + if (txCtx.TxHasEffects() || hasLocks || txCtx.TopicOperations.HasOperations()) { request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); - //} + } if (hasLocks) { if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { From a01699f692216cad92606caeefee30bb206c4bb6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 30 Sep 2024 18:33:49 +0300 Subject: [PATCH 40/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index e289956789c6..45705567c604 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -1503,7 +1503,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub item.SetFlags(shardInfo.AffectedFlags); } - //TODO: NDataIntegrity & Volatile + //TODO: NDataIntegrity CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator << " volitale=" << ((transaction.GetFlags() & TEvTxProxy::TEvProposeTransaction::FlagVolatile) != 0)); Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(ev.Release(), commitInfo.Coordinator, /* subscribe */ true)); } @@ -1854,7 +1854,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Y_ABORT_UNLESS(GetTotalMemory() == 0); return; } - //Process(); } void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { From 0b46a19e65fc125741bce7bf0ef7f40aabe18336 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 1 Oct 2024 12:17:06 +0300 Subject: [PATCH 41/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 1c9c0268c466..51912e7aee39 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1049,6 +1049,7 @@ class TKqpSessionActor : public TActorBootstrapped { if (tx && tx->GetHasEffects() && broken) { ReplyQueryError(Ydb::StatusIds::ABORTED, "tx has effects, but locks are broken", MessageFromIssues(std::vector{txCtx.TxManager ? *txCtx.TxManager->GetLockIssue() : txCtx.Locks.GetIssue()})); + return false; } return true; From 2a744129aadd7b860fbe0ed84f1bd8de2037bc61 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 1 Oct 2024 13:55:16 +0300 Subject: [PATCH 42/69] return-batching --- ydb/core/kqp/runtime/kqp_write_table.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 034b2d189707..c01458c1187e 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -908,10 +908,9 @@ class TShardsInfo { void MakeNextBatches(i64 maxDataSize, ui64 maxCount) { YQL_ENSURE(BatchesInFlight == 0); i64 dataSize = 0; - Y_UNUSED(dataSize, maxCount); - while (/*BatchesInFlight < maxCount - && */BatchesInFlight < Batches.size() - /*&& dataSize + GetBatch(BatchesInFlight).GetMemory() <= maxDataSize()*/) { + while (BatchesInFlight < maxCount + && BatchesInFlight < Batches.size() + && dataSize + GetBatch(BatchesInFlight).GetMemory() <= maxDataSize) { dataSize += GetBatch(BatchesInFlight).GetMemory(); ++BatchesInFlight; } @@ -1066,7 +1065,6 @@ class TShardsInfo { class TShardedWriteController : public IShardedWriteController { public: void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { - Y_ABORT_UNLESS(!SchemeEntry); SchemeEntry = schemeEntry; BeforePartitioningChanged(); for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { @@ -1081,7 +1079,6 @@ class TShardedWriteController : public IShardedWriteController { void OnPartitioningChanged( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { - Y_ABORT_UNLESS(!SchemeEntry); SchemeEntry = schemeEntry; PartitionsEntry = std::move(partitionsEntry); BeforePartitioningChanged(); @@ -1096,7 +1093,6 @@ class TShardedWriteController : public IShardedWriteController { } void BeforePartitioningChanged() { - return; for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { auto& writeInfo = WriteInfos.at(token); if (writeInfo.Serializer) { @@ -1110,7 +1106,6 @@ class TShardedWriteController : public IShardedWriteController { } void AfterPartitioningChanged() { - return; if (!WriteInfos.empty()) { ShardsInfo.Close(); ReshardData(); @@ -1391,7 +1386,6 @@ class TShardedWriteController : public IShardedWriteController { } void ReshardData() { - return; for (auto& [_, shardInfo] : ShardsInfo.GetShards()) { for (size_t index = 0; index < shardInfo.Size(); ++index) { const auto& batch = shardInfo.GetBatch(index); From da5a44e4a3db36516d280a5dc91914645d277040 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 1 Oct 2024 15:55:40 +0300 Subject: [PATCH 43/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 45705567c604..7c872a6aa123 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -68,7 +68,6 @@ namespace { for (const ui64 receivingShardId : prepareSettings.ReceivingShards) { protoLocks->AddReceivingShards(receivingShardId); } - //Y_ABORT_UNLESS(prepareSettings.Arbiter); if (prepareSettings.Arbiter) { protoLocks->SetArbiterShard(*prepareSettings.Arbiter); } @@ -124,7 +123,7 @@ struct IKqpTableWriterCallbacks { // EvWrite statuses virtual void OnPrepared(IKqpTransactionManager::TPrepareResult&& preparedInfo, ui64 dataSize) = 0; virtual void OnCommitted(ui64 shardId, ui64 dataSize) = 0; - virtual void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) = 0; + virtual void OnMessageAcknowledged(ui64 dataSize) = 0; virtual void OnError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues) = 0; }; @@ -714,7 +713,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { if (result && result->IsShardEmpty && Mode == EMode::IMMEDIATE_COMMIT) { Callbacks->OnCommitted(ev->Get()->Record.GetOrigin(), result->DataSize); } else if (result) { - Callbacks->OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), SchemeEntry->TableId, result->DataSize, false); + Callbacks->OnMessageAcknowledged(result->DataSize); } } @@ -1094,8 +1093,7 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu AFL_ENSURE(false); } - void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { - Y_UNUSED(shardId, tableId, hasRead); + void OnMessageAcknowledged(ui64 dataSize) override { EgressStats.Bytes += dataSize; EgressStats.Chunks++; EgressStats.Splits++; @@ -1856,8 +1854,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } } - void OnMessageAcknowledged(ui64 shardId, TTableId tableId, ui64 dataSize, bool hasRead) override { - Y_UNUSED(dataSize, shardId, tableId, hasRead); // TODO: delete unused + void OnMessageAcknowledged(ui64 dataSize) override { + Y_UNUSED(dataSize); Process(); } From 3de12ed98f7336f35e51ba1698797240211018d8 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 1 Oct 2024 23:26:02 +0300 Subject: [PATCH 44/69] fix --- ydb/core/kqp/common/kqp_tx.h | 2 + ydb/core/kqp/runtime/kqp_write_actor.cpp | 68 ++++++++----------- ydb/core/kqp/runtime/kqp_write_table.cpp | 7 ++ ydb/core/kqp/runtime/kqp_write_table.h | 1 + .../kqp/session_actor/kqp_session_actor.cpp | 18 +++-- 5 files changed, 53 insertions(+), 43 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx.h b/ydb/core/kqp/common/kqp_tx.h index b4a4b6f9accb..ed4c8f798c13 100644 --- a/ydb/core/kqp/common/kqp_tx.h +++ b/ydb/core/kqp/common/kqp_tx.h @@ -197,6 +197,8 @@ class TKqpTransactionContext : public NYql::TKikimrTransactionContextBase { void Finish() final { YQL_ENSURE(DeferredEffects.Empty()); YQL_ENSURE(!Locks.HasLocks()); + YQL_ENSURE(!TxManager); + YQL_ENSURE(!BufferActorId); FinishTime = TInstant::Now(); diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 7c872a6aa123..cee53deb823c 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -72,7 +72,6 @@ namespace { protoLocks->SetArbiterShard(*prepareSettings.Arbiter); } } else if (prepareSettings.ArbiterColumnShard == shardId) { - Y_ABORT_UNLESS(false); protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); for (const ui64 sendingShardId : prepareSettings.SendingShards) { protoLocks->AddSendingShards(sendingShardId); @@ -81,7 +80,6 @@ namespace { protoLocks->AddReceivingShards(receivingShardId); } } else { - Y_ABORT_UNLESS(false); protoLocks->SetArbiterColumnShard(*prepareSettings.ArbiterColumnShard); protoLocks->AddSendingShards(*prepareSettings.ArbiterColumnShard); protoLocks->AddReceivingShards(*prepareSettings.ArbiterColumnShard); @@ -294,7 +292,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void UpdateShards() { - // Maybe there are better ways to initialize shards... + // TODO: Maybe there are better ways to initialize new shards... for (const auto& shardInfo : ShardedWriteController->GetPendingShards()) { TxManager->AddShard(shardInfo.ShardId, IsOlap(), TablePath); TxManager->AddAction(shardInfo.ShardId, IKqpTransactionManager::EAction::WRITE); @@ -335,7 +333,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { hFunc(TEvPrivate::TEvTerminate, Handle); } } catch (const yexception& e) { - CA_LOG_E(e.what()); + CA_LOG_W(e.what()); } } @@ -384,11 +382,12 @@ class TKqpTableWriteActor : public TActorBootstrapped { entry.ShowPrivatePath = true; request->ResultSet.emplace_back(entry); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); + //Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)); } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + YQL_ENSURE(!SchemeRequest || InconsistentTx); auto& resultSet = ev->Get()->Request->ResultSet; YQL_ENSURE(resultSet.size() == 1); @@ -677,19 +676,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { }()); for (const auto& lock : ev->Get()->Record.GetTxLocks()) { - if (Mode != EMode::WRITE) { - CA_LOG_D("ERROR HERE TEST MODE" << static_cast(Mode) << " Got completed result TxId=" << ev->Get()->Record.GetTxId() - << ", TabletId=" << ev->Get()->Record.GetOrigin() - << ", Cookie=" << ev->Cookie - << ", Locks=" << [&]() { - TStringBuilder builder; - for (const auto& lock : ev->Get()->Record.GetTxLocks()) { - builder << lock.ShortDebugString(); - } - return builder; - }()); - Y_ABORT_UNLESS(false); - } Y_ABORT_UNLESS(Mode == EMode::WRITE); if (!TxManager->AddLock(ev->Get()->Record.GetOrigin(), lock)) { YQL_ENSURE(TxManager->BrokenLocks()); @@ -776,7 +762,9 @@ class TKqpTableWriteActor : public TActorBootstrapped { auto evWrite = std::make_unique(); evWrite->Record.SetTxMode(isPrepare - ? NKikimrDataEvents::TEvWrite::MODE_VOLATILE_PREPARE //NKikimrDataEvents::TEvWrite::MODE_PREPARE + ? (TxManager->IsVolatile() + ? NKikimrDataEvents::TEvWrite::MODE_VOLATILE_PREPARE + : NKikimrDataEvents::TEvWrite::MODE_PREPARE) : NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); if (isImmediateCommit) { @@ -838,7 +826,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { CA_LOG_D("Retry failed: not found ShardID=" << shardId << " with Cookie=" << ifCookieEqual.value_or(0)); return; } - Y_ABORT_UNLESS(false); CA_LOG_D("Retry ShardID=" << shardId << " with Cookie=" << ifCookieEqual.value_or(0)); SendDataToShard(shardId); @@ -1071,7 +1058,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu void PassAway() override { WriteTableActor->Terminate(); - //TODO: wait for writer actors? TActorBootstrapped::PassAway(); } @@ -1288,8 +1274,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub ProcessAckQueue(); if (State == EState::FLUSHING) { - //Y_ABORT_UNLESS(DataQueues.empty()); - //Y_ABORT_UNLESS(AckQueue.empty()); bool isEmpty = true; for (auto& [_, info] : WriteInfos) { isEmpty = isEmpty && info.WriteTableActor->IsReady() && info.WriteTableActor->IsEmpty(); @@ -1338,20 +1322,22 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void ProcessAckQueue() { while (!AckQueue.empty()) { - //const auto& item = AckQueue.front(); - //if (GetTotalFreeSpace() >= item.DataSize) { + const auto& item = AckQueue.front(); + if (GetTotalFreeSpace() >= item.DataSize) { auto result = std::make_unique(); result->Token = AckQueue.front().Token; CA_LOG_D("ProcessAckQueue ACK" << AckQueue.front().ForwardActorId); Send(AckQueue.front().ForwardActorId, result.release()); AckQueue.pop(); - //} else { - // return; - //} + } else { + Y_ABORT_UNLESS(false); + return; + } } } void ProcessWrite() { + // TODO: early flush //Y_ABORT_UNLESS(GetTotalFreeSpace() <= 0); const bool needToFlush = /*GetTotalFreeSpace() <= 0*/ false || State == EState::FLUSHING @@ -1447,7 +1433,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } auto evWrite = std::make_unique(isRollback ? NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE - : NKikimrDataEvents::TEvWrite::MODE_VOLATILE_PREPARE); //NKikimrDataEvents::TEvWrite::MODE_PREPARE); + : (TxManager->IsVolatile() + ? NKikimrDataEvents::TEvWrite::MODE_VOLATILE_PREPARE + : NKikimrDataEvents::TEvWrite::MODE_PREPARE)); if (isRollback) { FillEvWriteRollback(evWrite.get(), shardId, TxManager); @@ -1546,9 +1534,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void PassAway() override { - if (State != EState::FINISHED) { - Rollback(); - } for (auto& [_, queue] : DataQueues) { while (!queue.empty()) { auto& message = queue.front(); @@ -1871,12 +1856,18 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ReplyErrorAndDie(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { - CA_LOG_E(message << ". statusCode=" << NYql::NDqProto::StatusIds_StatusCode_Name(statusCode) << ". subIssues=" << subIssues.ToString()); - Send(SessionActorId, new TEvKqpBuffer::TEvError{ - message, - statusCode, - subIssues, - }); + CA_LOG_E(message << ". statusCode=" << NYql::NDqProto::StatusIds_StatusCode_Name(statusCode) << ". subIssues=" << subIssues.ToString() << ". sessionActorId=" << SessionActorId << ". isRollback=" << (State == EState::ROLLINGBACK)); + + Y_ABORT_UNLESS(!HasError); + HasError = true; + if (State != EState::ROLLINGBACK) { + // Rollback can't finish with error + Send(SessionActorId, new TEvKqpBuffer::TEvError{ + message, + statusCode, + subIssues, + }); + } PassAway(); } @@ -1904,6 +1895,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub THashMap WriteInfos; EState State; + bool HasError = false; THashMap> DataQueues; struct TAckMessage { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index c01458c1187e..35d79c608a56 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -1093,6 +1093,9 @@ class TShardedWriteController : public IShardedWriteController { } void BeforePartitioningChanged() { + if (!Settings.Inconsistent) { + return; + } for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { auto& writeInfo = WriteInfos.at(token); if (writeInfo.Serializer) { @@ -1106,6 +1109,9 @@ class TShardedWriteController : public IShardedWriteController { } void AfterPartitioningChanged() { + if (!Settings.Inconsistent) { + return; + } if (!WriteInfos.empty()) { ShardsInfo.Close(); ReshardData(); @@ -1386,6 +1392,7 @@ class TShardedWriteController : public IShardedWriteController { } void ReshardData() { + YQL_ENSURE(!Settings.Inconsistent); for (auto& [_, shardInfo] : ShardsInfo.GetShards()) { for (size_t index = 0; index < shardInfo.Size(); ++index) { const auto& batch = shardInfo.GetBatch(index); diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 8dd583654856..352f6252d388 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -84,6 +84,7 @@ struct TShardedWriteControllerSettings { i64 MemoryLimitTotal; i64 MemoryLimitPerMessage; i64 MaxBatchesPerMessage; + bool Inconsistent; }; IShardedWriteControllerPtr CreateShardedWriteController( diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 51912e7aee39..66e111e2a32d 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1324,7 +1324,7 @@ class TKqpSessionActor : public TActorBootstrapped { if (Settings.TableService.GetEnableOltpSink() && !txCtx->TxManager) { txCtx->TxManager = CreateKqpTransactionManager(); } - if (Settings.TableService.GetEnableOltpSink() && !txCtx->BufferActorId) { + if (Settings.TableService.GetEnableOltpSink() && !txCtx->BufferActorId && txCtx->HasTableWrite) { TKqpBufferWriterSettings settings { .SessionActorId = SelfId(), .TxManager = txCtx->TxManager, @@ -1598,8 +1598,15 @@ class TKqpSessionActor : public TActorBootstrapped { void Handle(TEvKqpBuffer::TEvError::TPtr& ev) { const auto& msg = *ev->Get(); - TString logMsg = TStringBuilder() << "got TEvKqpBuffer::TEvError in " << CurrentStateFuncName(); - LOG_W(logMsg << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId); + TString logMsg = TStringBuilder() << "got TEvKqpBuffer::TEvError in " << CurrentStateFuncName() + << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId << " from: " << ev->Sender; + + if (!QueryState || !QueryState->TxCtx || QueryState->TxCtx->BufferActorId != ev->Sender) { + LOG_W(logMsg << ": Old error, current bufferActor=" << QueryState->TxCtx->BufferActorId); + return; + } else { + LOG_W(logMsg); + } TString reason = TStringBuilder() << msg.Message << "; " << msg.SubIssues.ToString(); @@ -2118,13 +2125,14 @@ class TKqpSessionActor : public TActorBootstrapped { if (QueryState->TxCtx) { QueryState->TxCtx->ClearDeferredEffects(); QueryState->TxCtx->Locks.Clear(); - QueryState->TxCtx->Finish(); - QueryState->TxCtx->TxManager.reset(); + if (QueryState->TxCtx->BufferActorId) { Send(QueryState->TxCtx->BufferActorId, new TEvKqpBuffer::TEvTerminate{}); QueryState->TxCtx->BufferActorId = {}; } + + QueryState->TxCtx->Finish(); } } From 612f022723e0a5423100274bf14485678db4dd74 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Oct 2024 10:14:07 +0300 Subject: [PATCH 45/69] return --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index cee53deb823c..cc531ff008cf 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -382,7 +382,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { entry.ShowPrivatePath = true; request->ResultSet.emplace_back(entry); - //Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)); } From 2b7eb2d69b1e2725860e3c39c65e9597efc39e8f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Oct 2024 12:54:16 +0300 Subject: [PATCH 46/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 5 ++--- ydb/core/kqp/runtime/kqp_write_table.cpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index cc531ff008cf..495a1f1425b5 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -1337,9 +1337,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ProcessWrite() { - // TODO: early flush - //Y_ABORT_UNLESS(GetTotalFreeSpace() <= 0); - const bool needToFlush = /*GetTotalFreeSpace() <= 0*/ false + Y_ABORT_UNLESS(GetTotalFreeSpace() > 0); // TODO: delete + const bool needToFlush = GetTotalFreeSpace() <= 0 || State == EState::FLUSHING || State == EState::PREPARING || State == EState::COMMITTING diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 35d79c608a56..8c3d4a1b7b1e 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -915,7 +915,7 @@ class TShardsInfo { ++BatchesInFlight; } YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).GetMemory() <= maxDataSize); - Y_ABORT_UNLESS(BatchesInFlight == Batches.size()); + Y_ABORT_UNLESS(BatchesInFlight == Batches.size()); // TODO: delete } const TBatchWithMetadata& GetBatch(size_t index) const { From 6839b32fc1cff6fdf193864a99a1ba49ad74bc2a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Oct 2024 13:30:50 +0300 Subject: [PATCH 47/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 66e111e2a32d..e33a2c7ab1fa 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1602,7 +1602,7 @@ class TKqpSessionActor : public TActorBootstrapped { << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId << " from: " << ev->Sender; if (!QueryState || !QueryState->TxCtx || QueryState->TxCtx->BufferActorId != ev->Sender) { - LOG_W(logMsg << ": Old error, current bufferActor=" << QueryState->TxCtx->BufferActorId); + //LOG_W(logMsg << ": Old error, current bufferActor=" << QueryState->TxCtx->BufferActorId); return; } else { LOG_W(logMsg); From 524397655d0a146f2560b4cfbe97b5dd015f2588 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Oct 2024 13:31:33 +0300 Subject: [PATCH 48/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index e33a2c7ab1fa..4cb20e72695b 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1602,7 +1602,7 @@ class TKqpSessionActor : public TActorBootstrapped { << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId << " from: " << ev->Sender; if (!QueryState || !QueryState->TxCtx || QueryState->TxCtx->BufferActorId != ev->Sender) { - //LOG_W(logMsg << ": Old error, current bufferActor=" << QueryState->TxCtx->BufferActorId); + LOG_W(logMsg << ": Old error, current bufferActor."); return; } else { LOG_W(logMsg); From 5de7d2bde2ddc45cd8c8f39b4ddc31294c8159bb Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 2 Oct 2024 14:00:49 +0300 Subject: [PATCH 49/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 4cb20e72695b..8889e6601264 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1602,7 +1602,7 @@ class TKqpSessionActor : public TActorBootstrapped { << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.StatusCode) << " send to: " << ExecuterId << " from: " << ev->Sender; if (!QueryState || !QueryState->TxCtx || QueryState->TxCtx->BufferActorId != ev->Sender) { - LOG_W(logMsg << ": Old error, current bufferActor."); + LOG_E(logMsg << ": Old error."); return; } else { LOG_W(logMsg); From f4940d0a9c25547ca5f3b20bd3d2508352dd6fcc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 3 Oct 2024 13:23:48 +0300 Subject: [PATCH 50/69] fixes --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 15 +++++++++++--- .../kqp/session_actor/kqp_session_actor.cpp | 1 - ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 20 +++++++++---------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 495a1f1425b5..f3342dda984c 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -757,7 +757,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { const bool isPrepare = metadata->IsFinal && Mode == EMode::PREPARE; const bool isImmediateCommit = metadata->IsFinal && Mode == EMode::IMMEDIATE_COMMIT; - Y_ABORT_UNLESS(!metadata->IsFinal || isPrepare || isImmediateCommit); + Y_ABORT_UNLESS(!metadata->IsFinal || isPrepare || isImmediateCommit || Mode == EMode::WRITE); // TODO: delete auto evWrite = std::make_unique(); @@ -1002,6 +1002,9 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } TMaybe ExtraData() override { + if (!WriteTableActor) { + return {}; + } NKikimrKqp::TEvKqpOutputActorResultInfo resultInfo; for (const auto& lock : WriteTableActor->GetLocks()) { resultInfo.AddLocks()->CopyFrom(lock); @@ -1057,7 +1060,10 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu } void PassAway() override { - WriteTableActor->Terminate(); + if (WriteTableActor) { + WriteTableActor->Terminate(); + //WriteTableActor->PassAway(); + } TActorBootstrapped::PassAway(); } @@ -1547,6 +1553,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub for (auto& [_, info] : WriteInfos) { if (info.WriteTableActor) { info.WriteTableActor->Terminate(); + //info.WriteTableActor->PassAway(); } } TActorBootstrapped::PassAway(); @@ -1828,8 +1835,10 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub CA_LOG_D("OnCommitted: ignored"); return; } + CA_LOG_D("Recv committed"); Y_UNUSED(shardId, dataSize); if (TxManager->ConsumeCommitResult(shardId)) { + CA_LOG_D("Committed"); State = EState::FINISHED; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); ExecuterActorId = {}; @@ -1844,6 +1853,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void OnFlushed() { + CA_LOG_D("Flushed"); State = EState::WRITING; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); ExecuterActorId = {}; @@ -2037,7 +2047,6 @@ class TKqpForwardWriteActor : public TActorBootstrapped, if (!Data) { Data = std::make_shared>(); } - Data->emplace_back(std::move(data)); DataSize += size; diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index 8889e6601264..bef35e5c8683 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1241,7 +1241,6 @@ class TKqpSessionActor : public TActorBootstrapped { request.LocksOp = ELocksOp::Commit; } } else { - const bool hasLocks = txCtx.TxManager ? !txCtx.TxManager->HasLocks() : txCtx.Locks.HasLocks(); if (hasLocks || txCtx.TopicOperations.HasOperations()) { if (!txCtx.GetSnapshot().IsValid() || txCtx.TxHasEffects() || txCtx.TopicOperations.HasOperations()) { LOG_D("TExecPhysicalRequest, tx has commit locks"); diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index efee6ba58a12..6fd10d68f115 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3746,8 +3746,8 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { virtual void DoExecute() = 0; public: void Execute() { - AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); - AppConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); + AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(IsOlap); + AppConfig.MutableTableServiceConfig()->SetEnableOltpSink(!IsOlap); AppConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); auto settings = TKikimrSettings().SetAppConfig(AppConfig).SetWithSampleTables(false); @@ -3984,14 +3984,14 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { INSERT INTO `/Root/DataShard` (Col1, Col2) VALUES (0u, 0); INSERT INTO `/Root/DataShard` (Col1, Col3) VALUES (1u, 'test'); INSERT INTO `/Root/DataShard` (Col1, Col3, Col2) VALUES (2u, 't', 3); - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard` ORDER BY Col1; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson(output, R"([[0u;[0];#];[1u;#;["test"]];[2u;[3];["t"]]])"); @@ -4000,21 +4000,21 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { { auto it = client.ExecuteQuery(R"( UPDATE `/Root/DataShard` SET Col2 = 42 WHERE Col3 == 'not found'; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } { auto it = client.ExecuteQuery(R"( UPDATE `/Root/DataShard` SET Col2 = 42 WHERE Col3 == 't'; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard` ORDER BY Col1; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson(output, R"([[0u;[0];#];[1u;#;["test"]];[2u;[42];["t"]]])"); @@ -4023,20 +4023,20 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { { auto it = client.ExecuteQuery(R"( UPDATE `/Root/DataShard` ON SELECT 0u AS Col1, 1 AS Col2, 'text' AS Col3; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } { auto it = client.ExecuteQuery(R"( UPDATE `/Root/DataShard` ON SELECT 10u AS Col1, 1 AS Col2, 'text' AS Col3; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard` ORDER BY Col1; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); TString output = StreamResultToYson(it); CompareYson(output, R"([[0u;[1];["text"]];[1u;#;["test"]];[2u;[42];["t"]]])"); From abab17ee305b93a282791df6f13cc3e592139323 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 3 Oct 2024 15:15:42 +0300 Subject: [PATCH 51/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index f3342dda984c..0720d391dcd6 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -1161,6 +1161,13 @@ struct TEvBufferWrite : public TEventLocal Settings; std::shared_ptr> Data; std::shared_ptr Alloc; + + ~TEvBufferWrite() { + if (Alloc) { + TGuard guard(*Alloc); + Data = nullptr; + } + } }; struct TEvBufferWriteResult : public TEventLocal { @@ -1270,6 +1277,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub message.Close = ev->Get()->Close; message.Data = ev->Get()->Data; message.Alloc = ev->Get()->Alloc; + + ev->Get()->Data = nullptr; + ev->Get()->Alloc = nullptr; Process(); } From eb1f33343a31bcf5a8d3b1643164ae750987618f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 4 Oct 2024 17:16:07 +0300 Subject: [PATCH 52/69] fix --- ydb/core/kqp/session_actor/kqp_query_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index b0b3378328c0..ee9af364fa0a 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -419,7 +419,7 @@ class TKqpQueryState : public TNonCopyable { if (TxCtx->CanDeferEffects()) { // At current time sinks require separate tnx with commit. - while (tx && tx->GetHasEffects() && TxCtx->HasOlapTable) { + while (tx && tx->GetHasEffects()/* && !TxCtx->HasOlapTable*/) { QueryData->CreateKqpValueMap(tx); bool success = TxCtx->AddDeferredEffect(tx, QueryData); YQL_ENSURE(success); From baa5ff0a62299289c3c7fa60507e6659a10ba07e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 6 Oct 2024 09:40:20 +0300 Subject: [PATCH 53/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 4 +--- ydb/core/kqp/runtime/kqp_write_table.cpp | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 0720d391dcd6..19dc69067707 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -185,9 +185,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { TShardedWriteControllerSettings { .MemoryLimitTotal = MessageSettings.InFlightMemoryLimitPerActorBytes, .MemoryLimitPerMessage = MessageSettings.MemoryLimitPerMessageBytes, - .MaxBatchesPerMessage = (SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable - ? 1 - : MessageSettings.MaxBatchesPerMessage), + .MaxBatchesPerMessage = MessageSettings.MaxBatchesPerMessage, }, TypeEnv, Alloc); diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 8c3d4a1b7b1e..ded816816457 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -914,7 +914,7 @@ class TShardsInfo { dataSize += GetBatch(BatchesInFlight).GetMemory(); ++BatchesInFlight; } - YQL_ENSURE(BatchesInFlight == Batches.size() || GetBatch(BatchesInFlight).GetMemory() <= maxDataSize); + YQL_ENSURE(BatchesInFlight == Batches.size() || dataSize + GetBatch(BatchesInFlight).GetMemory() > maxDataSize || BatchesInFlight >= maxCount); Y_ABORT_UNLESS(BatchesInFlight == Batches.size()); // TODO: delete } From 22635528274aa98d5da90fc4220a2207c6776d0c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 7 Oct 2024 12:04:45 +0300 Subject: [PATCH 54/69] remove build locks --- ydb/core/kqp/executer_actor/kqp_data_executer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 184fac4b8091..2d150db60db6 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -334,7 +334,9 @@ class TKqpDataExecuter : public TKqpExecuterBaseLockHandle = std::move(LockHandle); } - BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); + if (!TxManager) { + BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks); + } } auto resultSize = ResponseEv->GetByteSize(); From 1ccdaa1f355701831bfec45e327c5c28ff82e5c9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 8 Oct 2024 15:19:24 +0300 Subject: [PATCH 55/69] flush --- .../kqp/executer_actor/kqp_data_executer.cpp | 7 +-- ydb/core/kqp/runtime/kqp_write_actor.cpp | 15 +++--- ydb/core/kqp/session_actor/kqp_query_state.h | 46 ++----------------- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 21 ++++++--- 4 files changed, 32 insertions(+), 57 deletions(-) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 2d150db60db6..8f2d3685db21 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -305,11 +305,12 @@ class TKqpDataExecuter : public TKqpExecuterBase(); - event->ExecuterActorId = SelfId(); + //auto event = std::make_unique(); + //event->ExecuterActorId = SelfId(); Become(&TKqpDataExecuter::FinalizeState); LOG_D("SEND BUFFER FLUSH " << BufferActorId); - Send(BufferActorId, event.release()); + //Send(BufferActorId, event.release()); + MakeResponseAndPassAway(); return; } } diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 19dc69067707..390cc8c458e2 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -169,7 +169,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { const bool inconsistentTx, const NMiniKQL::TTypeEnvironment& typeEnv, std::shared_ptr alloc, - const IKqpTransactionManagerPtr& txManager) + const IKqpTransactionManagerPtr& txManager, + const TActorId sessionActorId) : TypeEnv(typeEnv) , Alloc(alloc) , TableId(tableId) @@ -180,6 +181,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { , Callbacks(callbacks) , TxManager(txManager ? txManager : CreateKqpTransactionManager(/* collectOnly= */ true)) { + LogPrefix = TStringBuilder() << "SessionActorId: " << sessionActorId; try { ShardedWriteController = CreateShardedWriteController( TShardedWriteControllerSettings { @@ -955,7 +957,8 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu Settings.GetInconsistentTx(), TypeEnv, Alloc, - nullptr); + nullptr, + TActorId{}); WriteTableActorId = RegisterWithSameMailbox(WriteTableActor); @@ -1202,7 +1205,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Bootstrap() { - LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; + LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", SessionActorId: " << SessionActorId << ", " << LogPrefix; Become(&TKqpBufferWriteActor::StateWrite); } @@ -1255,7 +1258,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub InconsistentTx, TypeEnv, Alloc, - TxManager); + TxManager, + SessionActorId); writeInfo.WriteTableActorId = RegisterWithSameMailbox(writeInfo.WriteTableActor); CA_LOG_D("Create new TableWriteActor for table `" << settings.TablePath << "` (" << settings.TableId << "). lockId=" << LockTxId << " " << writeInfo.WriteTableActorId); } @@ -1359,10 +1363,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub || State == EState::ROLLINGBACK; if (needToFlush) { + CA_LOG_D("DO FLUSH"); for (auto& [_, info] : WriteInfos) { - CA_LOG_D("FLUSH TEST"); if (info.WriteTableActor->IsReady()) { - CA_LOG_D("FLUSH READY"); info.WriteTableActor->Flush(); } } diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index ee9af364fa0a..a36d1ecc72ad 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -352,11 +352,6 @@ class TKqpQueryState : public TNonCopyable { return false; } - if (TxCtx->HasOlapTable) { - // HTAP/OLAP transactions always use separate commit. - return false; - } - if (TxCtx->HasUncommittedChangesRead || AppData()->FeatureFlags.GetEnableForceImmediateEffectsExecution()) { if (tx && tx->GetHasEffects()) { YQL_ENSURE(tx->ResultsSize() == 0); @@ -417,9 +412,12 @@ class TKqpQueryState : public TNonCopyable { const auto& phyQuery = PreparedQuery->GetPhysicalQuery(); auto tx = PreparedQuery->GetPhyTxOrEmpty(CurrentTx); - if (TxCtx->CanDeferEffects()) { + //Cerr << ">>> CURRENT " << CurrentTx << Endl; + + if (TxCtx->CanDeferEffects()) { + //Cerr << ">>> DEFER " << (tx != nullptr) << " " << (tx && tx->GetHasEffects()) << " " << !TxCtx->HasOlapTable << Endl; // At current time sinks require separate tnx with commit. - while (tx && tx->GetHasEffects()/* && !TxCtx->HasOlapTable*/) { + while (tx && tx->GetHasEffects() && !TxCtx->HasOlapTable) { QueryData->CreateKqpValueMap(tx); bool success = TxCtx->AddDeferredEffect(tx, QueryData); YQL_ENSURE(success); @@ -436,40 +434,6 @@ class TKqpQueryState : public TNonCopyable { return tx; } - /*bool HasOnlyInonsistentSinkInStage(const ::NKqpProto::TKqpPhyStage& stage) const { - for (const auto& sink : stage.GetSinks()) { - if (sink.GetTypeCase() == NKqpProto::TKqpSink::kInternalSink && sink.GetInternalSink().GetSettings().Is()) { - NKikimrKqp::TKqpTableSinkSettings settings; - YQL_ENSURE(sink.GetInternalSink().GetSettings().UnpackTo(&settings), "Failed to unpack settings"); - if (!settings.GetInconsistentTx()) { - return true; - } - } - } - return false; - } - - bool HasTxSink() const { - const auto& query = PreparedQuery->GetPhysicalQuery(); - for (auto& tx : query.GetTransactions()) { - for (const auto& stage : tx.GetStages()) { - if (HasTxSinkInStage(stage)) { - return true; - } - } - } - return false; - } - - bool HasTxSinkInTx(const TKqpPhyTxHolder::TConstPtr& tx) const { - for (const auto& stage : tx->GetStages()) { - if (HasTxSinkInStage(stage)) { - return true; - } - } - return false; - }*/ - bool HasTxControl() const { return RequestEv->HasTxControl(); } diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 6fd10d68f115..7594e72f913a 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -4301,7 +4301,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { .ExtractValueSync() .GetTransaction(); UNIT_ASSERT(tx.IsActive()); - { + /*{ auto prepareResult = session2.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES (10u, "test1", 10), (20u, "test2", 11), (2147483647u, "test3", 12), (2147483640u, NULL, 13); @@ -4326,6 +4326,13 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { CompareYson( output, R"([[4u]])"); + }*/ + + { + auto prepareResult = session2.ExecuteQuery(R"( + SELECT * FROM `/Root/DataShard2`; + )", TTxControl::Tx(tx.GetId()), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); } { @@ -4341,12 +4348,12 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); } - { - auto prepareResult = client.ExecuteQuery(R"( - REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); - UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); - } + //{ + // auto prepareResult = client.ExecuteQuery(R"( + // REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; + // )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + // UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + //} } Y_UNIT_TEST(ReadDatashardAndColumnshard) { From beab1aca51a7108aa7973e4e884caabc19759b77 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 9 Oct 2024 13:36:11 +0300 Subject: [PATCH 56/69] use-immediate-effects --- .../kqp/executer_actor/kqp_data_executer.cpp | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 8f2d3685db21..2efaa5cc6bbb 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -286,30 +286,38 @@ class TKqpDataExecuter : public TKqpExecuterBase(); event->ExecuterActorId = SelfId(); event->TxId = TxId; - LOG_D("SEND BUFFER COMMIT " << BufferActorId); - Become(&TKqpDataExecuter::FinalizeState); Send(BufferActorId, event.release()); return; } else if (Request.LocksOp == ELocksOp::Rollback) { - auto event = std::make_unique(); - event->ExecuterActorId = SelfId(); Become(&TKqpDataExecuter::FinalizeState); LOG_D("SEND BUFFER ROLLBACK " << BufferActorId); + + auto event = std::make_unique(); + event->ExecuterActorId = SelfId(); Send(BufferActorId, event.release()); MakeResponseAndPassAway(); return; - } else { - //auto event = std::make_unique(); - //event->ExecuterActorId = SelfId(); + } else if (Request.UseImmediateEffects) { Become(&TKqpDataExecuter::FinalizeState); LOG_D("SEND BUFFER FLUSH " << BufferActorId); - //Send(BufferActorId, event.release()); + + auto event = std::make_unique(); + event->ExecuterActorId = SelfId(); + Send(BufferActorId, event.release()); + return; + } else { + Become(&TKqpDataExecuter::FinalizeState); + LOG_D("SEND BUFFER SKIP FLUSH " << BufferActorId); MakeResponseAndPassAway(); return; } From e1456856828eae084a49105182a7578a8a075743 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 13 Oct 2024 21:42:54 +0300 Subject: [PATCH 57/69] fix --- ydb/core/kqp/session_actor/kqp_session_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index bef35e5c8683..624aab1e5adf 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1260,7 +1260,7 @@ class TKqpSessionActor : public TActorBootstrapped { } else if (QueryState->ShouldAcquireLocks(tx) && (!txCtx.HasOlapTable || Settings.TableService.GetEnableOlapSink())) { request.AcquireLocksTxId = txCtx.Locks.GetLockTxId(); - if (txCtx.HasUncommittedChangesRead || Config->FeatureFlags.GetEnableForceImmediateEffectsExecution() || txCtx.HasOlapTable) { + if (!txCtx.CanDeferEffects()) { request.UseImmediateEffects = true; } } From f736fdf9c47b8aef11e0a4cf985750ebfd090f72 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 17 Oct 2024 17:00:45 +0300 Subject: [PATCH 58/69] logs --- ydb/core/kqp/executer_actor/kqp_data_executer.cpp | 6 ++++-- ydb/core/kqp/runtime/kqp_write_actor.cpp | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 2efaa5cc6bbb..ae25bd93246b 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -238,6 +238,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseAdd(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); if (TxManager) { + LOG_D("TEST::ADDDSHARD>> R " << lock.GetDataShard() << " " << stageInfo.Meta.TablePath << " " << TxManager->GetShardsCount()); TxManager->AddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); TxManager->AddLock(lock.GetDataShard(), lock); @@ -1088,7 +1089,7 @@ class TKqpDataExecuter : public TKqpExecuterBaseAsActorContext()); - LOG_T("Execute planned transaction, coordinator: " << TxCoordinator); + LOG_D("Execute planned transaction, coordinator: " << TxCoordinator << " for " << affectedSet.size() << "shards"); Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(ev.Release(), TxCoordinator, /* subscribe */ true)); } @@ -1738,7 +1739,8 @@ class TKqpDataExecuter : public TKqpExecuterBase ev; if (isOlap) { diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 390cc8c458e2..c203c7bbd760 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -294,6 +294,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { void UpdateShards() { // TODO: Maybe there are better ways to initialize new shards... for (const auto& shardInfo : ShardedWriteController->GetPendingShards()) { + CA_LOG_D("TEST::ADDDSHARD>> W " << shardInfo.ShardId << " " << TablePath << " " << TxManager->GetShardsCount()); TxManager->AddShard(shardInfo.ShardId, IsOlap(), TablePath); TxManager->AddAction(shardInfo.ShardId, IKqpTransactionManager::EAction::WRITE); if (shardInfo.HasRead) { @@ -1502,11 +1503,12 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub for (const auto& shardInfo : commitInfo.ShardsInfo) { auto& item = *affectedSet.Add(); item.SetTabletId(shardInfo.ShardId); + Y_ABORT_UNLESS(shardInfo.AffectedFlags != 0); item.SetFlags(shardInfo.AffectedFlags); } //TODO: NDataIntegrity - CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator << " volitale=" << ((transaction.GetFlags() & TEvTxProxy::TEvProposeTransaction::FlagVolatile) != 0)); + CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator << " volitale=" << ((transaction.GetFlags() & TEvTxProxy::TEvProposeTransaction::FlagVolatile) != 0) << " shards=" << affectedSet.size()); Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(ev.Release(), commitInfo.Coordinator, /* subscribe */ true)); } From cfdf04f86eb446f6d305d264f591949e6e45c271 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 18 Oct 2024 00:45:13 +0300 Subject: [PATCH 59/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 2c36da0a0faf..d108a50e6709 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -361,7 +361,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void ResolveTable() { - Counters->WriteActorsShardResolve->Inc(); SchemeEntry.reset(); SchemeRequest.reset(); @@ -385,11 +384,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { entry.ShowPrivatePath = true; request->ResultSet.emplace_back(entry); - WriteActorStateSpan = NWilson::TSpan(TWilsonKqp::WriteActorTableNavigate, WriteActorSpan.GetTraceId(), - "WaitForShardsResolve", NWilson::EFlags::AUTO_END); - - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {}), 0, 0, WriteActorSpan.GetTraceId()); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request), 0, 0, WriteActorSpan.GetTraceId()); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {}), 0, 0); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)), 0, 0); } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { @@ -448,7 +444,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { request->ResultSet.emplace_back(std::move(keyRange)); TAutoPtr resolveReq(new TEvTxProxySchemeCache::TEvResolveKeySet(request)); - Send(MakeSchemeCacheID(), resolveReq.Release(), 0, 0, WriteActorSpan.GetTraceId()); + Send(MakeSchemeCacheID(), resolveReq.Release()); } void Handle(TEvTxProxySchemeCache::TEvResolveKeySetResult::TPtr& ev) { @@ -1065,13 +1061,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu NYql::TIssues issues; issues.AddIssue(std::move(issue)); - if (WriteActorStateSpan) { - WriteActorStateSpan.EndError(issues.ToOneLineString()); - } - if (WriteActorSpan) { - WriteActorSpan.EndError(issues.ToOneLineString()); - } - Callbacks->OnAsyncOutputError(OutputIndex, std::move(issues), statusCode); } From 4ab75dfacc0d710397a6c5ddf612896b322ec08a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 18 Oct 2024 19:16:23 +0300 Subject: [PATCH 60/69] fix --- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index d108a50e6709..83c75d19897c 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -385,7 +385,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { request->ResultSet.emplace_back(entry); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {}), 0, 0); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)), 0, 0); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request), 0, 0); } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { From 9fed58aa8d88eba23c551bc74fa3cdc17855fcfd Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 24 Oct 2024 18:03:37 +0300 Subject: [PATCH 61/69] fix --- ydb/core/kqp/expr_nodes/kqp_expr_nodes.json | 3 +- ydb/core/kqp/opt/kqp_opt_build_txs.cpp | 54 +++++++++++++-------- ydb/core/kqp/opt/kqp_opt_effects.cpp | 24 +++++---- ydb/core/kqp/runtime/kqp_write_actor.cpp | 20 ++++++-- ydb/core/kqp/runtime/kqp_write_table.cpp | 45 +++++++++++++++-- ydb/core/kqp/runtime/kqp_write_table.h | 7 ++- ydb/core/protos/kqp.proto | 1 + 7 files changed, 114 insertions(+), 40 deletions(-) diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index 63ec4a7dc929..be84a22fc5d5 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -548,7 +548,8 @@ {"Index": 0, "Name": "Table", "Type": "TKqpTable"}, {"Index": 1, "Name": "InconsistentWrite", "Type": "TCoAtom"}, {"Index": 2, "Name": "Mode", "Type": "TCoAtom"}, - {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList", "Optional": true} + {"Index": 3, "Name": "Priority", "Type": "TCoAtom"}, + {"Index": 4, "Name": "Settings", "Type": "TCoNameValueTupleList", "Optional": true} ] }, { diff --git a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp index b57a34791ef1..fecacf7ac09d 100644 --- a/ydb/core/kqp/opt/kqp_opt_build_txs.cpp +++ b/ydb/core/kqp/opt/kqp_opt_build_txs.cpp @@ -560,7 +560,7 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { } if (!query.Effects().Empty()) { - auto collectedEffects = CollectEffects(query.Effects(), ctx); + auto collectedEffects = CollectEffects(query.Effects(), ctx, *KqpCtx); for (auto& effects : collectedEffects) { auto tx = BuildTx(effects.Ptr(), ctx, /* isPrecompute */ false); @@ -585,13 +585,12 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { } private: - TVector CollectEffects(const TExprList& list, TExprContext& ctx) { - return {list}; - + TVector CollectEffects(const TExprList& list, TExprContext& ctx, TKqpOptimizeContext& kqpCtx) { struct TEffectsInfo { enum class EType { KQP_EFFECT, KQP_SINK, + KQP_BATCH_SINK, EXTERNAL_SINK, }; @@ -619,23 +618,38 @@ class TKqpBuildTxsTransformer : public TSyncTransformerBase { effectsInfos.back().Type = TEffectsInfo::EType::EXTERNAL_SINK; effectsInfos.back().Exprs.push_back(expr.Ptr()); } else { - // Two table sinks can't be executed in one physical transaction if they write into one table. - const TStringBuf tablePathId = sinkSettings.Cast().Table().PathId().Value(); - - auto it = std::find_if( - std::begin(effectsInfos), - std::end(effectsInfos), - [&tablePathId](const auto& effectsInfo) { - return effectsInfo.Type == TEffectsInfo::EType::KQP_SINK - && !effectsInfo.TablesPathIds.contains(tablePathId); - }); - if (it == std::end(effectsInfos)) { - effectsInfos.emplace_back(); - it = std::prev(std::end(effectsInfos)); - it->Type = TEffectsInfo::EType::KQP_SINK; + // Two table sinks can't be executed in one physical transaction if they write into same table and have same priority. + + const auto& tableDescription = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, sinkSettings.Cast().Table().Path()); + if (tableDescription.Metadata->Kind == EKikimrTableKind::Olap) { + const TStringBuf tablePathId = sinkSettings.Cast().Table().PathId().Value(); + + auto it = std::find_if( + std::begin(effectsInfos), + std::end(effectsInfos), + [&tablePathId](const auto& effectsInfo) { + return effectsInfo.Type == TEffectsInfo::EType::KQP_SINK + && !effectsInfo.TablesPathIds.contains(tablePathId); + }); + if (it == std::end(effectsInfos)) { + effectsInfos.emplace_back(); + it = std::prev(std::end(effectsInfos)); + it->Type = TEffectsInfo::EType::KQP_SINK; + } + it->TablesPathIds.insert(tablePathId); + it->Exprs.push_back(expr.Ptr()); + } else { + auto it = std::find_if( + std::begin(effectsInfos), + std::end(effectsInfos), + [](const auto& effectsInfo) { return effectsInfo.Type == TEffectsInfo::EType::KQP_BATCH_SINK; }); + if (it == std::end(effectsInfos)) { + effectsInfos.emplace_back(); + it = std::prev(std::end(effectsInfos)); + it->Type = TEffectsInfo::EType::KQP_BATCH_SINK; + } + it->Exprs.push_back(expr.Ptr()); } - it->TablesPathIds.insert(tablePathId); - it->Exprs.push_back(expr.Ptr()); } } else { // Table effects are executed all in one physical transaction. diff --git a/ydb/core/kqp/opt/kqp_opt_effects.cpp b/ydb/core/kqp/opt/kqp_opt_effects.cpp index 0c0e818d6853..250e32e6b26d 100644 --- a/ydb/core/kqp/opt/kqp_opt_effects.cpp +++ b/ydb/core/kqp/opt/kqp_opt_effects.cpp @@ -232,7 +232,7 @@ TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& table, TPositionH } TDqStage RebuildPureStageWithSink(TExprBase expr, const TKqpTable& table, - const bool allowInconsistentWrites, const TStringBuf mode, TExprContext& ctx) { + const bool allowInconsistentWrites, const TStringBuf mode, const i64 order, TExprContext& ctx) { Y_DEBUG_ABORT_UNLESS(IsDqPureExpr(expr)); return Build(ctx, expr.Pos()) @@ -257,6 +257,7 @@ TDqStage RebuildPureStageWithSink(TExprBase expr, const TKqpTable& table, ? ctx.NewAtom(expr.Pos(), "true") : ctx.NewAtom(expr.Pos(), "false")) .Mode(ctx.NewAtom(expr.Pos(), mode)) + .Priority(ctx.NewAtom(expr.Pos(), ToString(order))) .Settings() .Build() .Build() @@ -296,7 +297,7 @@ TDqPhyPrecompute BuildPrecomputeStage(TExprBase expr, TExprContext& ctx) { } bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, - const TCoArgument& inputArg, TMaybeNode& stageInput, TMaybeNode& effect, bool& sinkEffect) + const TCoArgument& inputArg, TMaybeNode& stageInput, TMaybeNode& effect, bool& sinkEffect, const i64 order) { const auto& table = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, node.Table().Path()); @@ -306,12 +307,13 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const } sinkEffect = NeedSinks(table, kqpCtx) || (kqpCtx.IsGenericQuery() && settings.AllowInconsistentWrites); + const i64 priority = (table.Metadata->Kind == EKikimrTableKind::Olap) ? 0 : order; if (IsDqPureExpr(node.Input())) { if (sinkEffect) { stageInput = RebuildPureStageWithSink( node.Input(), node.Table(), - settings.AllowInconsistentWrites, settings.Mode, ctx); + settings.AllowInconsistentWrites, settings.Mode, priority, ctx); effect = Build(ctx, node.Pos()) .Stage(stageInput.Cast().Ptr()) .SinkIndex().Build("0") @@ -352,6 +354,7 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const ? ctx.NewAtom(node.Pos(), "true") : ctx.NewAtom(node.Pos(), "false")) .Mode(ctx.NewAtom(node.Pos(), settings.Mode)) + .Priority(ctx.NewAtom(node.Pos(), ToString(priority))) .Settings() .Build() .Build() @@ -448,16 +451,16 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const } bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, - const TCoArgument& inputArg, TMaybeNode& stageInput, TMaybeNode& effect, bool& sinkEffect) + const TCoArgument& inputArg, TMaybeNode& stageInput, TMaybeNode& effect, bool& sinkEffect, const i64 order) { const auto& table = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, node.Table().Path()); sinkEffect = NeedSinks(table, kqpCtx); - + const i64 priority = (table.Metadata->Kind == EKikimrTableKind::Olap) ? 0 : order; if (IsDqPureExpr(node.Input())) { if (sinkEffect) { const auto keyColumns = BuildKeyColumnsList(table, node.Pos(), ctx); - stageInput = RebuildPureStageWithSink(node.Input(), node.Table(), false, "delete", ctx); + stageInput = RebuildPureStageWithSink(node.Input(), node.Table(), false, "delete", priority, ctx); effect = Build(ctx, node.Pos()) .Stage(stageInput.Cast().Ptr()) .SinkIndex().Build("0") @@ -494,6 +497,7 @@ bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const .Table(node.Table()) .InconsistentWrite(ctx.NewAtom(node.Pos(), "false")) .Mode(ctx.NewAtom(node.Pos(), "delete")) + .Priority(ctx.NewAtom(node.Pos(), ToString(priority))) .Settings() .Build() .Build() @@ -584,6 +588,7 @@ bool BuildEffects(TPositionHandle pos, const TVector& effects, TVector newSinkEffects; newEffects.reserve(effects.size()); newSinkEffects.reserve(effects.size()); + i64 order = builtEffects.size(); for (const auto& effect : effects) { TMaybeNode newEffect; @@ -596,15 +601,17 @@ bool BuildEffects(TPositionHandle pos, const TVector& effects, .Done(); if (auto maybeUpsertRows = effect.Maybe()) { - if (!BuildUpsertRowsEffect(maybeUpsertRows.Cast(), ctx, kqpCtx, inputArg, input, newEffect, sinkEffect)) { + if (!BuildUpsertRowsEffect(maybeUpsertRows.Cast(), ctx, kqpCtx, inputArg, input, newEffect, sinkEffect, order)) { return false; } + ++order; } if (auto maybeDeleteRows = effect.Maybe()) { - if (!BuildDeleteRowsEffect(maybeDeleteRows.Cast(), ctx, kqpCtx, inputArg, input, newEffect, sinkEffect)) { + if (!BuildDeleteRowsEffect(maybeDeleteRows.Cast(), ctx, kqpCtx, inputArg, input, newEffect, sinkEffect, order)) { return false; } + ++order; } if (input) { @@ -696,7 +703,6 @@ TMaybeNode BuildEffects(const TKqlQuery& query, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { TVector builtEffects; - if constexpr (GroupEffectsByTable) { TMap> tableEffectsMap; ExploreEffectLists( diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 83c75d19897c..2881d7b9e4f1 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -245,12 +245,14 @@ class TKqpTableWriteActor : public TActorBootstrapped { TWriteToken Open( NKikimrDataEvents::TEvWrite::TOperation::EOperationType operationType, - TVector&& columnsMetadata) { + TVector&& columnsMetadata, + i64 priority) { YQL_ENSURE(!Closed); auto token = ShardedWriteController->Open( TableId, operationType, - std::move(columnsMetadata)); + std::move(columnsMetadata), + priority); CA_LOG_D("Open write to " << TableId << " token=" << token); return token; } @@ -733,6 +735,10 @@ class TKqpTableWriteActor : public TActorBootstrapped { } } + void FlushBuffers() { + ShardedWriteController->FlushBuffers(); + } + void Flush() { for (const auto& shardInfo : ShardedWriteController->GetPendingShards()) { SendDataToShard(shardInfo.ShardId); @@ -971,7 +977,8 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu for (const auto & column : Settings.GetColumns()) { columnsMetadata.push_back(column); } - WriteToken = WriteTableActor->Open(GetOperation(Settings.GetType()), std::move(columnsMetadata)); + YQL_ENSURE(Settings.GetPriority() == 0); + WriteToken = WriteTableActor->Open(GetOperation(Settings.GetType()), std::move(columnsMetadata), Settings.GetPriority()); WaitingForTableActor = true; CA_LOG_D("New TKqpDirectWriteActor for table `" << Settings.GetTable().GetPath() << "` (" << TableId << ")."); @@ -1149,6 +1156,7 @@ struct TWriteSettings { NKikimrDataEvents::TEvWrite::TOperation::EOperationType OperationType; TVector Columns; TTransactionSettings TransactionSettings; + i64 Priority; }; struct TBufferWriteMessage { @@ -1268,7 +1276,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub CA_LOG_D("Create new TableWriteActor for table `" << settings.TablePath << "` (" << settings.TableId << "). lockId=" << LockTxId << " " << writeInfo.WriteTableActorId); } - auto cookie = writeInfo.WriteTableActor->Open(settings.OperationType, std::move(settings.Columns)); + auto cookie = writeInfo.WriteTableActor->Open(settings.OperationType, std::move(settings.Columns), settings.Priority); token = TWriteToken{settings.TableId, cookie}; } else { token = *ev->Get()->Token; @@ -1383,6 +1391,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub for (auto& [_, queue] : DataQueues) { Y_ABORT_UNLESS(queue.empty()); } + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->FlushBuffers(); + } Process(); } @@ -2032,6 +2043,7 @@ class TKqpForwardWriteActor : public TActorBootstrapped, .LockNodeId = Settings.GetLockNodeId(), .InconsistentTx = Settings.GetInconsistentTx(), }, + .Priority = Settings.GetPriority(), }; } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index b697ae2f4561..0792ccb3ba8e 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -861,6 +861,7 @@ struct TMetadata { const TTableId TableId; const NKikimrDataEvents::TEvWrite::TOperation::EOperationType OperationType; const TVector InputColumnsMetadata; + const i64 Priority; }; struct TBatchWithMetadata { @@ -1132,7 +1133,8 @@ class TShardedWriteController : public IShardedWriteController { TWriteToken Open( const TTableId tableId, const NKikimrDataEvents::TEvWrite::TOperation::EOperationType operationType, - TVector&& inputColumns) override { + TVector&& inputColumns, + const i64 priority) override { auto token = CurrentWriteToken++; auto iter = WriteInfos.emplace( token, @@ -1141,6 +1143,7 @@ class TShardedWriteController : public IShardedWriteController { .TableId = tableId, .OperationType = operationType, .InputColumnsMetadata = std::move(inputColumns), + .Priority = priority, }, .Serializer = nullptr, .Closed = false, @@ -1167,7 +1170,11 @@ class TShardedWriteController : public IShardedWriteController { YQL_ENSURE(info.Serializer); info.Serializer->AddData(data); - FlushSerializer(token, GetMemory() >= Settings.MemoryLimitTotal); + if (info.Metadata.Priority == 0) { + FlushSerializer(token, GetMemory() >= Settings.MemoryLimitTotal); + } else { + YQL_ENSURE(GetMemory() <= Settings.MemoryLimitTotal); + } } void Close(TWriteToken token) override { @@ -1176,8 +1183,38 @@ class TShardedWriteController : public IShardedWriteController { YQL_ENSURE(info.Serializer); info.Closed = true; info.Serializer->Close(); - FlushSerializer(token, true); - YQL_ENSURE(info.Serializer->IsFinished()); + if (info.Metadata.Priority == 0) { + FlushSerializer(token, true); + YQL_ENSURE(info.Serializer->IsFinished()); + } + } + + void FlushBuffers() override { + TVector writeTokensFoFlush; + for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { + const auto& writeInfo = WriteInfos.at(token); + YQL_ENSURE(writeInfo.Closed); + if (writeInfo.Metadata.Priority != 0) { + YQL_ENSURE(!writeInfo.Serializer->IsFinished()); + writeTokensFoFlush.push_back(token); + } else { + YQL_ENSURE(writeInfo.Serializer->IsFinished()); + } + } + + std::sort( + std::begin(writeTokensFoFlush), + std::end(writeTokensFoFlush), + [&](const TWriteToken& lhs, const TWriteToken& rhs) { + const auto& leftWriteInfo = WriteInfos.at(lhs); + const auto& rightWriteInfo = WriteInfos.at(rhs); + return leftWriteInfo.Metadata.Priority < rightWriteInfo.Metadata.Priority; + }); + + for (const TWriteToken token : writeTokensFoFlush) { + FlushSerializer(token, true); + YQL_ENSURE(WriteInfos.at(token).Serializer->IsFinished()); + } } void Close() override { diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 352f6252d388..e3e645541310 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -22,15 +22,18 @@ class IShardedWriteController : public TThrRefBase { // Data ordering invariant: // For two writes A and B: - // A happend before B <=> Close(A) happend before Open(B). + // A happend before B <=> Close(A) happend before Open(B) otherwise Priority(A) < Priority(B). virtual TWriteToken Open( const TTableId TableId, const NKikimrDataEvents::TEvWrite::TOperation::EOperationType operationType, - TVector&& inputColumns) = 0; + TVector&& inputColumns, + const i64 priority) = 0; virtual void Write(TWriteToken token, const NMiniKQL::TUnboxedValueBatch& data) = 0; virtual void Close(TWriteToken token) = 0; + virtual void FlushBuffers() = 0; + virtual void Close() = 0; virtual void AddCoveringMessages() = 0; diff --git a/ydb/core/protos/kqp.proto b/ydb/core/protos/kqp.proto index 6bbfe534278e..990f74e650fd 100644 --- a/ydb/core/protos/kqp.proto +++ b/ydb/core/protos/kqp.proto @@ -715,6 +715,7 @@ message TKqpTableSinkSettings { optional bool InconsistentTx = 8; // Write each batch in it's own single immediate tx optional EType Type = 9; optional NActorsProto.TActorId BufferActorId = 10; + optional int64 Priority = 11; } message TKqpStreamLookupSettings { From 1208a66886066a3ec09e2c3a9fda9782c0093cc3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 25 Oct 2024 13:14:22 +0300 Subject: [PATCH 62/69] fix --- ydb/core/kqp/common/kqp_tx_manager.h | 2 +- .../kqp/executer_actor/kqp_data_executer.cpp | 14 +--- ydb/core/kqp/runtime/kqp_write_actor.cpp | 77 ++++++++----------- ydb/core/kqp/runtime/kqp_write_table.cpp | 11 +-- ydb/core/kqp/session_actor/kqp_query_state.h | 3 - .../kqp/session_actor/kqp_session_actor.cpp | 2 +- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 16 ++-- .../datashard/datashard_write_operation.cpp | 2 + 8 files changed, 54 insertions(+), 73 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 2fb48ecc6562..25d65c6c1368 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -71,7 +71,7 @@ class IKqpTransactionManager { struct TPrepareInfo { const THashSet& SendingShards; const THashSet& ReceivingShards; - std::optional Arbiter; // TODO: support volatile + std::optional Arbiter; // TODO: support non-volatile std::optional ArbiterColumnShard; // TODO: support columnshard&topic }; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index ae25bd93246b..974c8e2d3365 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -238,7 +238,6 @@ class TKqpDataExecuter : public TKqpExecuterBaseAdd(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); if (TxManager) { - LOG_D("TEST::ADDDSHARD>> R " << lock.GetDataShard() << " " << stageInfo.Meta.TablePath << " " << TxManager->GetShardsCount()); TxManager->AddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); TxManager->AddLock(lock.GetDataShard(), lock); @@ -286,13 +285,11 @@ class TKqpDataExecuter : public TKqpExecuterBaseSender); + LOG_I("Timed out on waiting for Compute Actors to finish - forcing shutdown. Sender: " << ev->Sender); if (ev->Sender == SelfId()) { PassAway(); diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 2881d7b9e4f1..1f0bb8e8781a 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -200,8 +200,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void Bootstrap() { - LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; - CA_LOG_D("New TKqpTableWriteActor for table `" << TablePath << "` (" << TableId << ")."); + LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", Table: `" << TablePath << "` (" << TableId << "), "<< LogPrefix; ResolveTable(); Become(&TKqpTableWriteActor::StateProcessing); } @@ -253,7 +252,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { operationType, std::move(columnsMetadata), priority); - CA_LOG_D("Open write to " << TableId << " token=" << token); + CA_LOG_D("Open: token=" << token); return token; } @@ -261,7 +260,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); YQL_ENSURE(!Closed); YQL_ENSURE(ShardedWriteController); - CA_LOG_D("Write to " << TableId << " token=" << token); + CA_LOG_D("Write: token=" << token); try { ShardedWriteController->Write(token, data); UpdateShards(); @@ -275,7 +274,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { void Close(TWriteToken token) { YQL_ENSURE(!Closed); YQL_ENSURE(ShardedWriteController); - CA_LOG_D("Close write to " << TableId << " token=" << token); + CA_LOG_D("Close: token=" << token); try { ShardedWriteController->Close(token); UpdateShards(); @@ -297,7 +296,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { void UpdateShards() { // TODO: Maybe there are better ways to initialize new shards... for (const auto& shardInfo : ShardedWriteController->GetPendingShards()) { - CA_LOG_D("TEST::ADDDSHARD>> W " << shardInfo.ShardId << " " << TablePath << " " << TxManager->GetShardsCount()); TxManager->AddShard(shardInfo.ShardId, IsOlap(), TablePath); TxManager->AddAction(shardInfo.ShardId, IKqpTransactionManager::EAction::WRITE); if (shardInfo.HasRead) { @@ -368,7 +366,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { if (ResolveAttempts++ >= MessageSettings.MaxResolveAttempts) { CA_LOG_E(TStringBuilder() - << "Too many table resolve attempts for table " << TableId << "."); + << "Too many table resolve attempts for table `" << TablePath << "` (" << TableId << ")."); RuntimeError( TStringBuilder() << "Too many table resolve attempts for table `" << TablePath << "`.", @@ -672,7 +670,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() << ", TabletId=" << ev->Get()->Record.GetOrigin() << ", Cookie=" << ev->Cookie - << " MODETEST= " << static_cast(Mode) + << ", Mode=" << static_cast(Mode) << ", Locks=" << [&]() { TStringBuilder builder; for (const auto& lock : ev->Get()->Record.GetTxLocks()) { @@ -688,7 +686,8 @@ class TKqpTableWriteActor : public TActorBootstrapped { NYql::TIssues issues; issues.AddIssue(*TxManager->GetLockIssue()); RuntimeError( - TStringBuilder() << "Transaction locks invalidated.", + TStringBuilder() << "Transaction locks invalidated. Table `" + << TablePath << "`.", NYql::NDqProto::StatusIds::ABORTED, issues); return; @@ -767,8 +766,6 @@ class TKqpTableWriteActor : public TActorBootstrapped { const bool isPrepare = metadata->IsFinal && Mode == EMode::PREPARE; const bool isImmediateCommit = metadata->IsFinal && Mode == EMode::IMMEDIATE_COMMIT; - Y_ABORT_UNLESS(!metadata->IsFinal || isPrepare || isImmediateCommit || Mode == EMode::WRITE); // TODO: delete - auto evWrite = std::make_unique(); evWrite->Record.SetTxMode(isPrepare @@ -980,8 +977,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu YQL_ENSURE(Settings.GetPriority() == 0); WriteToken = WriteTableActor->Open(GetOperation(Settings.GetType()), std::move(columnsMetadata), Settings.GetPriority()); WaitingForTableActor = true; - - CA_LOG_D("New TKqpDirectWriteActor for table `" << Settings.GetTable().GetPath() << "` (" << TableId << ")."); } static constexpr char ActorName[] = "KQP_DIRECT_WRITE_ACTOR"; @@ -1074,7 +1069,6 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu void PassAway() override { if (WriteTableActor) { WriteTableActor->Terminate(); - //WriteTableActor->PassAway(); } TActorBootstrapped::PassAway(); } @@ -1198,7 +1192,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub WRITING, // Allow to write data to buffer. FLUSHING, // Force flush (for uncommitted changes visibility). Can't accept any writes in this state. PREPARING, // Do preparation for commit. All writers are closed. New writes wouldn't be accepted. - COMMITTING, // Do immediate commit (single shard). All writers are closed. New writes wouldn't be accepted. + COMMITTING, // Do commit. All writers are closed. New writes wouldn't be accepted. ROLLINGBACK, // Do rollback. New writes wouldn't be accepted. FINISHED, }; @@ -1356,18 +1350,16 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub if (GetTotalFreeSpace() >= item.DataSize) { auto result = std::make_unique(); result->Token = AckQueue.front().Token; - CA_LOG_D("ProcessAckQueue ACK" << AckQueue.front().ForwardActorId); Send(AckQueue.front().ForwardActorId, result.release()); AckQueue.pop(); } else { - Y_ABORT_UNLESS(false); + YQL_ENSURE(false); return; } } } void ProcessWrite() { - Y_ABORT_UNLESS(GetTotalFreeSpace() > 0); // TODO: delete const bool needToFlush = GetTotalFreeSpace() <= 0 || State == EState::FLUSHING || State == EState::PREPARING @@ -1375,7 +1367,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub || State == EState::ROLLINGBACK; if (needToFlush) { - CA_LOG_D("DO FLUSH"); + CA_LOG_D("Flush data"); for (auto& [_, info] : WriteInfos) { if (info.WriteTableActor->IsReady()) { info.WriteTableActor->Flush(); @@ -1385,11 +1377,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Flush() { - CA_LOG_D("Start FLUSHING"); + CA_LOG_D("Start flush"); YQL_ENSURE(State == EState::WRITING); State = EState::FLUSHING; for (auto& [_, queue] : DataQueues) { - Y_ABORT_UNLESS(queue.empty()); + YQL_ENSURE(queue.empty()); } for (auto& [_, info] : WriteInfos) { info.WriteTableActor->FlushBuffers(); @@ -1398,11 +1390,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Prepare(const ui64 txId) { - CA_LOG_D("Start PREPARE"); + CA_LOG_D("Start prepare for distributed commit"); YQL_ENSURE(State == EState::WRITING); State = EState::PREPARING; for (auto& [_, queue] : DataQueues) { - Y_ABORT_UNLESS(queue.empty()); + YQL_ENSURE(queue.empty()); } TxId = txId; for (auto& [_, info] : WriteInfos) { @@ -1414,13 +1406,12 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void ImmediateCommit() { - CA_LOG_D("Start COMMIT I"); + CA_LOG_D("Start immediate commit"); YQL_ENSURE(State == EState::WRITING); State = EState::COMMITTING; for (auto& [_, queue] : DataQueues) { - Y_ABORT_UNLESS(queue.empty()); + YQL_ENSURE(queue.empty()); } - CA_LOG_D("Start immediate commit"); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetImmediateCommit(); } @@ -1429,13 +1420,12 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void DistributedCommit() { - CA_LOG_D("Start COMMIT D"); + CA_LOG_D("Start distributed commit with TxId=" << *TxId); YQL_ENSURE(State == EState::PREPARING); State = EState::COMMITTING; for (auto& [_, queue] : DataQueues) { - Y_ABORT_UNLESS(queue.empty()); + YQL_ENSURE(queue.empty()); } - CA_LOG_D("Start distributed commit TxId" << *TxId); for (auto& [_, info] : WriteInfos) { info.WriteTableActor->SetDistributedCommit(); } @@ -1443,7 +1433,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } void Rollback() { - CA_LOG_D("Start ROLLBACK"); + CA_LOG_D("Start rollback"); State = EState::ROLLINGBACK; SendToExternalShards(true); } @@ -1522,7 +1512,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub } //TODO: NDataIntegrity - CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator << " volitale=" << ((transaction.GetFlags() & TEvTxProxy::TEvProposeTransaction::FlagVolatile) != 0) << " shards=" << affectedSet.size()); + CA_LOG_D("Execute planned transaction, coordinator: " << commitInfo.Coordinator + << ", volitale: " << ((transaction.GetFlags() & TEvTxProxy::TEvProposeTransaction::FlagVolatile) != 0) + << ", shards: " << affectedSet.size()); Send(MakePipePerNodeCacheID(false), new TEvPipeCache::TEvForward(ev.Release(), commitInfo.Coordinator, /* subscribe */ true)); } @@ -1580,14 +1572,12 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub for (auto& [_, info] : WriteInfos) { if (info.WriteTableActor) { info.WriteTableActor->Terminate(); - //info.WriteTableActor->PassAway(); } } TActorBootstrapped::PassAway(); } void Handle(TEvTxProxy::TEvProposeTransactionStatus::TPtr &ev) { - // TODO: move it to commit actor??? TEvTxProxy::TEvProposeTransactionStatus* res = ev->Get(); CA_LOG_D("Got transaction status, status: " << res->GetStatus()); @@ -1607,7 +1597,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusDeclined: case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusDeclinedNoSpace: case TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusRestarting: - // TODO: CancelProposal + // TODO: CancelProposal??? ReplyErrorAndDie(TStringBuilder() << "Failed to plan transaction, status: " << res->GetStatus(), NYql::NDqProto::StatusIds::UNAVAILABLE, {}); break; @@ -1800,7 +1790,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void ProcessWritePreparedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { if (State != EState::PREPARING) { - CA_LOG_D("ProcessWritePreparedShard: ignored"); + CA_LOG_D("Ignored write prepared event."); return; } const auto& record = ev->Get()->Record; @@ -1821,7 +1811,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void ProcessWriteCompletedShard(NKikimr::NEvents::TDataEvents::TEvWriteResult::TPtr& ev) { if (State != EState::COMMITTING) { - CA_LOG_D("ProcessWriteCompletedShard: ignored"); + CA_LOG_D("Ignored write completed event."); return; } CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() @@ -1844,7 +1834,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnPrepared(IKqpTransactionManager::TPrepareResult&& preparedInfo, ui64 dataSize) override { if (State != EState::PREPARING) { - CA_LOG_D("OnPrepared: ignored"); return; } Y_UNUSED(preparedInfo, dataSize); @@ -1859,10 +1848,8 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void OnCommitted(ui64 shardId, ui64 dataSize) override { if (State != EState::COMMITTING) { - CA_LOG_D("OnCommitted: ignored"); return; } - CA_LOG_D("Recv committed"); Y_UNUSED(shardId, dataSize); if (TxManager->ConsumeCommitResult(shardId)) { CA_LOG_D("Committed"); @@ -1994,7 +1981,7 @@ class TKqpForwardWriteActor : public TActorBootstrapped, } void Handle(TEvBufferWriteResult::TPtr& result) { - CA_LOG_D("TKqpForwardWriteActor Recv from=" << BufferActorId); + CA_LOG_D("TKqpForwardWriteActor recieve EvBufferWriteResult from " << BufferActorId); EgressStats.Bytes += DataSize; EgressStats.Chunks++; EgressStats.Splits++; @@ -2008,11 +1995,11 @@ class TKqpForwardWriteActor : public TActorBootstrapped, } if (Closed) { - CA_LOG_D("TKqpForwardWriteActor FINISH"); + CA_LOG_D("Finished"); Callbacks->OnAsyncOutputFinished(GetOutputIndex()); return; } - CA_LOG_D("TKqpForwardWriteActor RESUME free=" << GetFreeSpace()); + CA_LOG_D("Resume with freeSpace=" << GetFreeSpace()); Callbacks->ResumeExecution(); } @@ -2047,7 +2034,7 @@ class TKqpForwardWriteActor : public TActorBootstrapped, }; } - CA_LOG_D("TKqpForwardWriteActor SEND data=" << DataSize << " closed=" << Closed); + CA_LOG_D("Send data=" << DataSize << ", closed=" << Closed << ", bufferActorId=" << BufferActorId); AFL_ENSURE(Send(BufferActorId, ev.release())); } @@ -2081,14 +2068,14 @@ class TKqpForwardWriteActor : public TActorBootstrapped, Data->emplace_back(std::move(data)); DataSize += size; - CA_LOG_D("TKqpForwardWriteActor ADD DATA : " << size << " / " << DataSize); + CA_LOG_D("Add data: " << size << " / " << DataSize); if (Closed || GetFreeSpace() <= 0) { WriteToBuffer(); } } void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) { - CA_LOG_E("TKqpForwardWriteActor ERROR : " << message); + CA_LOG_E("RuntimeError: " << message); NYql::TIssue issue(message); for (const auto& i : subIssues) { issue.AddSubIssue(MakeIntrusive(i)); diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 0792ccb3ba8e..ec1c4137dcd9 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -178,7 +178,7 @@ std::vector> BuildBatchBuilderColumns( result.reserve(columns.size()); for (const auto& column : columns) { if (inputColumnsIds.contains(column.GetId())) { - Y_ABORT_UNLESS(column.HasTypeId()); + YQL_ENSURE(column.HasTypeId()); auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(column.GetTypeId(), column.HasTypeInfo() ? &column.GetTypeInfo() : nullptr); result.emplace_back(column.GetName(), typeInfoMod.TypeInfo); @@ -648,7 +648,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { } ui64 AddRow(TRowWithData&& rowWithData) { - Y_ABORT_UNLESS(rowWithData.Cells.size() == ColumnCount); + YQL_ENSURE(rowWithData.Cells.size() == ColumnCount); ui64 newMemory = 0; for (const auto& cell : rowWithData.Cells) { newMemory += cell.Size(); @@ -909,6 +909,7 @@ class TShardsInfo { void MakeNextBatches(i64 maxDataSize, ui64 maxCount) { YQL_ENSURE(BatchesInFlight == 0); YQL_ENSURE(!IsEmpty()); + YQL_ENSURE(maxCount != 0); i64 dataSize = 0; // For columnshard batch can be slightly larger than the limit. while (BatchesInFlight < maxCount @@ -917,8 +918,8 @@ class TShardsInfo { dataSize += GetBatch(BatchesInFlight).GetMemory(); ++BatchesInFlight; } - YQL_ENSURE(BatchesInFlight == Batches.size() || dataSize + GetBatch(BatchesInFlight).GetMemory() > maxDataSize || BatchesInFlight >= maxCount); - Y_ABORT_UNLESS(BatchesInFlight == Batches.size()); // TODO: delete + YQL_ENSURE(BatchesInFlight != 0); + YQL_ENSURE(BatchesInFlight == Batches.size() || BatchesInFlight >= maxCount || dataSize + GetBatch(BatchesInFlight).GetMemory() > maxDataSize); } const TBatchWithMetadata& GetBatch(size_t index) const { @@ -1321,7 +1322,7 @@ class TShardedWriteController : public IShardedWriteController { if (writeInfo.Serializer) { total += writeInfo.Serializer->GetMemory(); } else { - Y_ABORT_UNLESS(writeInfo.Closed); + YQL_ENSURE(writeInfo.Closed); } } return total; diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index a36d1ecc72ad..8228984787db 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -412,10 +412,7 @@ class TKqpQueryState : public TNonCopyable { const auto& phyQuery = PreparedQuery->GetPhysicalQuery(); auto tx = PreparedQuery->GetPhyTxOrEmpty(CurrentTx); - //Cerr << ">>> CURRENT " << CurrentTx << Endl; - if (TxCtx->CanDeferEffects()) { - //Cerr << ">>> DEFER " << (tx != nullptr) << " " << (tx && tx->GetHasEffects()) << " " << !TxCtx->HasOlapTable << Endl; // At current time sinks require separate tnx with commit. while (tx && tx->GetHasEffects() && !TxCtx->HasOlapTable) { QueryData->CreateKqpValueMap(tx); diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index aa55a4de6d92..37a1048dd5ee 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -1344,7 +1344,7 @@ class TKqpSessionActor : public TActorBootstrapped { auto ev = std::make_unique(exId); Send(MakeTxProxyID(), ev.release()); if (!isRollback) { - Y_ABORT_UNLESS(!ExecuterId); + YQL_ENSURE(!ExecuterId); } ExecuterId = exId; } diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 7594e72f913a..ab61cc1dd041 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -4301,7 +4301,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { .ExtractValueSync() .GetTransaction(); UNIT_ASSERT(tx.IsActive()); - /*{ + { auto prepareResult = session2.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES (10u, "test1", 10), (20u, "test2", 11), (2147483647u, "test3", 12), (2147483640u, NULL, 13); @@ -4326,7 +4326,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { CompareYson( output, R"([[4u]])"); - }*/ + } { auto prepareResult = session2.ExecuteQuery(R"( @@ -4348,12 +4348,12 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); } - //{ - // auto prepareResult = client.ExecuteQuery(R"( - // REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; - // )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); - // UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); - //} + { + auto prepareResult = client.ExecuteQuery(R"( + REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } } Y_UNIT_TEST(ReadDatashardAndColumnshard) { diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index 5581882efdbb..0d67c7f02350 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -382,6 +382,8 @@ TString TWriteOperation::GetTxBody() const { } void TWriteOperation::SetTxBody(const TString& txBody) { + Y_ABORT_UNLESS(!WriteRequest); + NKikimrTxDataShard::TSerializedEvent proto; const bool success = proto.ParseFromString(txBody); Y_ABORT_UNLESS(success); From 0956faa84456bc50039998519c049324196aeea7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Oct 2024 14:21:45 +0300 Subject: [PATCH 63/69] fix --- ydb/core/kqp/common/kqp_tx_manager.cpp | 38 ++++++++++++++++-------- ydb/core/kqp/common/kqp_tx_manager.h | 2 ++ ydb/core/kqp/runtime/kqp_write_actor.cpp | 32 ++++++++++++-------- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 931978a7242f..fa1689c269d6 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -52,8 +52,8 @@ class TKqpTransactionManager : public IKqpTransactionManager { } bool AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lockProto) override { - TKqpLock lock(lockProto); Y_ABORT_UNLESS(State == ETransactionState::COLLECTING); + TKqpLock lock(lockProto); bool isError = (lock.Proto.GetCounter() >= NKikimr::TSysTables::TLocksTable::TLock::ErrorMin); bool isInvalidated = (lock.Proto.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken) || (lock.Proto.GetCounter() == NKikimr::TSysTables::TLocksTable::TLock::ErrorBroken); @@ -88,18 +88,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { LocksIssue = YqlIssue(NYql::TPosition(), NYql::TIssuesIds::KIKIMR_LOCKS_ACQUIRE_FAILURE); return false; } else if (isInvalidated) { - TStringBuilder message; - message << "Transaction locks invalidated. Tables: "; - bool first = true; - // TODO: add error by pathid - for (const auto& path : shardInfo.Pathes) { - if (!first) { - message << ", "; - first = false; - } - message << "`" << path << "`"; - } - LocksIssue = YqlIssue(NYql::TPosition(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message); + MakeLocksIssue(shardInfo); return false; } AFL_ENSURE(false); @@ -108,6 +97,14 @@ class TKqpTransactionManager : public IKqpTransactionManager { return true; } + void BreakLock(ui64 shardId) override { + if (LocksIssue) { + return; + } + auto& shardInfo = ShardsInfo.at(shardId); + MakeLocksIssue(shardInfo); + } + TTableInfo GetShardTableInfo(ui64 shardId) const override { const auto& info = ShardsInfo.at(shardId); return TTableInfo{ @@ -359,6 +356,21 @@ class TKqpTransactionManager : public IKqpTransactionManager { THashSet Pathes; }; + void MakeLocksIssue(const TShardInfo& shardInfo) { + TStringBuilder message; + message << "Transaction locks invalidated. Tables: "; + bool first = true; + // TODO: add error by pathid + for (const auto& path : shardInfo.Pathes) { + if (!first) { + message << ", "; + first = false; + } + message << "`" << path << "`"; + } + LocksIssue = YqlIssue(NYql::TPosition(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message); + } + THashSet ShardsIds; THashMap ShardsInfo; std::unordered_set TablePathes; diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index 25d65c6c1368..be2cf592b7cf 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -39,6 +39,8 @@ class IKqpTransactionManager { virtual void AddAction(ui64 shardId, ui8 action) = 0; virtual bool AddLock(ui64 shardId, const NKikimrDataEvents::TLock& lock) = 0; + virtual void BreakLock(ui64 shardId) = 0; + virtual TTableInfo GetShardTableInfo(ui64 shardId) const = 0; virtual TVector GetLocks() const = 0; diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 1f0bb8e8781a..51de104f5802 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -496,7 +496,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Unspecified error for table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::UNSPECIFIED, getIssues()); @@ -518,7 +518,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Aborted for table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::ABORTED, getIssues()); @@ -538,7 +538,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } else { RuntimeError( TStringBuilder() << "Internal error for table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::INTERNAL_ERROR, getIssues()); @@ -554,7 +554,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { RuntimeError( TStringBuilder() << "Disk space exhausted for table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::PRECONDITION_FAILED, getIssues()); @@ -571,7 +571,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { if (!InconsistentTx) { RuntimeError( TStringBuilder() << "Tablet " << ev->Get()->Record.GetOrigin() << " is overloaded. Table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::OVERLOADED, getIssues()); @@ -586,7 +586,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Cancelled request to table `" - << SchemeEntry->TableId.PathId.ToString() << "`." + << TablePath << "`." << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::CANCELLED, getIssues()); @@ -600,7 +600,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { << getIssues().ToOneLineString()); RuntimeError( TStringBuilder() << "Bad request. Table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::BAD_REQUEST, getIssues()); @@ -618,7 +618,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } else { RuntimeError( TStringBuilder() << "Scheme changed. Table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::SCHEME_ERROR, getIssues()); @@ -631,9 +631,12 @@ class TKqpTableWriteActor : public TActorBootstrapped { << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "." << getIssues().ToOneLineString()); + + TxManager->BreakLock(ev->Get()->Record.GetOrigin()); + YQL_ENSURE(TxManager->BrokenLocks()); RuntimeError( TStringBuilder() << "Transaction locks invalidated. Table `" - << SchemeEntry->TableId.PathId.ToString() << "`. " + << TablePath << "`. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::ABORTED, getIssues()); @@ -1731,7 +1734,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub << getIssues().ToOneLineString()); // TODO: support waiting ReplyErrorAndDie( - TStringBuilder() << "Tablet " << ev->Get()->Record.GetOrigin() << " is overloaded. Table. " + TStringBuilder() << "Tablet " << ev->Get()->Record.GetOrigin() << " is overloaded." << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::OVERLOADED, getIssues()); @@ -1755,7 +1758,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub << " Sink=" << this->SelfId() << "." << getIssues().ToOneLineString()); ReplyErrorAndDie( - TStringBuilder() << "Bad request. Table. " + TStringBuilder() << "Bad request. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::BAD_REQUEST, getIssues()); @@ -1767,7 +1770,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub << " Sink=" << this->SelfId() << "." << getIssues().ToOneLineString()); ReplyErrorAndDie( - TStringBuilder() << "Scheme changed. Table. " + TStringBuilder() << "Scheme changed. " << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::SCHEME_ERROR, getIssues()); @@ -1778,8 +1781,11 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub << " ShardID=" << ev->Get()->Record.GetOrigin() << "," << " Sink=" << this->SelfId() << "." << getIssues().ToOneLineString()); + + TxManager->BreakLock(ev->Get()->Record.GetOrigin()); + YQL_ENSURE(TxManager->BrokenLocks()); ReplyErrorAndDie( - TStringBuilder() << "Transaction locks invalidated.. " + TStringBuilder() << "Transaction locks invalidated." << getIssues().ToOneLineString(), NYql::NDqProto::StatusIds::ABORTED, getIssues()); From 45409fae4d5689a227fc220326e605063f4715e5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Oct 2024 14:50:35 +0300 Subject: [PATCH 64/69] fix --- ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp index bc4f31996137..aec42d54998e 100644 --- a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp @@ -141,14 +141,26 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - result = session.ExecuteQuery(Q_(R"( - UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; - )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + if (!GetIsOlap()) { + result = session.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - auto commitResult = tx->Commit().ExtractValueSync(); + auto commitResult = tx->Commit().ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, commitResult.GetIssues().ToString()); + } else { + // Olap works without defer + result = session.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + + auto commitResult = tx->Commit().ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); + } } }; @@ -177,6 +189,7 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { auto result = session.ExecuteQuery(Q_(R"( INSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "New"); + SELECT COUNT(*) FROM `/Root/KV`; )"), TTxControl::Tx(tx.GetId())).ExtractValueSync(); result.GetIssues().PrintTo(Cerr); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::BAD_REQUEST, result.GetIssues().ToString()); From 335c588502a7ea798ba364dc30d7badde24bf6cc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Oct 2024 15:54:01 +0300 Subject: [PATCH 65/69] fix --- ydb/core/kqp/common/kqp_tx_manager.cpp | 9 ++++++++- ydb/core/kqp/common/kqp_tx_manager.h | 2 ++ ydb/core/kqp/runtime/kqp_write_actor.cpp | 4 ++-- ydb/core/kqp/runtime/kqp_write_table.cpp | 6 +++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index fa1689c269d6..486718291e59 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -1,5 +1,6 @@ #include "kqp_tx_manager.h" +#include #include namespace NKikimr { @@ -166,6 +167,12 @@ class TKqpTransactionManager : public IKqpTransactionManager { return GetShardsCount() == 1; } + bool HasOlapTable() const override { + return std::any_of(std::begin(ShardsInfo), std::end(ShardsInfo), [](const auto& element) { + return element.second.IsOlap; + }); + } + bool IsEmpty() const override { return GetShardsCount() == 0; } @@ -304,7 +311,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { shardInfo.State = EShardState::EXECUTING; } - AFL_ENSURE(ReceivingShards.empty() || !IsSingleShard()); + AFL_ENSURE(ReceivingShards.empty() || !IsSingleShard() || HasOlapTable()); } TCommitInfo GetCommitInfo() override { diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index be2cf592b7cf..b868cd35e8ff 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -54,6 +54,8 @@ class IKqpTransactionManager { virtual bool IsReadOnly() const = 0; virtual bool IsSingleShard() const = 0; + virtual bool HasOlapTable() const = 0; + virtual bool IsEmpty() const = 0; virtual bool HasLocks() const = 0; diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 51de104f5802..6feee5aef3a5 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -1468,7 +1468,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub FillEvWritePrepare(evWrite.get(), shardId, *TxId, TxManager); } - CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", isPrepare=" << !isRollback << ", isImmediateCommit=" << isRollback << ", TxId=" << evWrite->Record.GetTxId() + CA_LOG_D("Send EvWrite (external) to ShardID=" << shardId << ", isPrepare=" << !isRollback << ", isImmediateCommit=" << isRollback << ", TxId=" << evWrite->Record.GetTxId() << ", LockTxId=" << evWrite->Record.GetLockTxId() << ", LockNodeId=" << evWrite->Record.GetLockNodeId() << ", Locks= " << [&]() { TStringBuilder builder; @@ -1631,7 +1631,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub Rollback(); State = EState::FINISHED; Send(ExecuterActorId, new TEvKqpBuffer::TEvResult{}); - } else if (TxManager->IsSingleShard() && !WriteInfos.empty()) { + } else if (TxManager->IsSingleShard() && !TxManager->HasOlapTable() && !WriteInfos.empty()) { TxManager->StartExecute(); ImmediateCommit(); } else { diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index ec1c4137dcd9..6f2709bc2c75 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -1069,6 +1069,7 @@ class TShardsInfo { class TShardedWriteController : public IShardedWriteController { public: void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) override { + IsOlap = true; SchemeEntry = schemeEntry; BeforePartitioningChanged(); for (TWriteToken token = 0; token < CurrentWriteToken; ++token) { @@ -1083,6 +1084,7 @@ class TShardedWriteController : public IShardedWriteController { void OnPartitioningChanged( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry) override { + IsOlap = false; SchemeEntry = schemeEntry; PartitionsEntry = std::move(partitionsEntry); BeforePartitioningChanged(); @@ -1427,9 +1429,10 @@ class TShardedWriteController : public IShardedWriteController { void BuildBatchesForShard(TShardsInfo::TShardInfo& shard) { if (shard.GetBatchesInFlight() == 0) { + YQL_ENSURE(IsOlap != std::nullopt); shard.MakeNextBatches( Settings.MemoryLimitPerMessage, - Settings.MaxBatchesPerMessage); + (*IsOlap) ? 1 : Settings.MaxBatchesPerMessage); } } @@ -1464,6 +1467,7 @@ class TShardedWriteController : public IShardedWriteController { std::optional SchemeEntry; std::optional PartitionsEntry; + std::optional IsOlap; }; } From 64c1cd3f64080516d80d21842be6c0522408b81d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Oct 2024 14:20:53 +0300 Subject: [PATCH 66/69] fix --- ydb/core/kqp/common/kqp_tx_manager.cpp | 33 ++++++++++++++++--- ydb/core/kqp/common/kqp_tx_manager.h | 4 +-- .../kqp/executer_actor/kqp_data_executer.cpp | 5 ++- .../kqp/executer_actor/kqp_executer_impl.h | 4 ++- ydb/core/kqp/expr_nodes/kqp_expr_nodes.json | 3 +- ydb/core/kqp/host/kqp_type_ann.cpp | 2 +- ydb/core/kqp/opt/kqp_opt_effects.cpp | 15 ++++++--- .../kqp/query_compiler/kqp_query_compiler.cpp | 1 + ydb/core/kqp/runtime/kqp_write_actor.cpp | 12 +++++-- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 2 +- ydb/core/protos/kqp.proto | 2 ++ 11 files changed, 63 insertions(+), 20 deletions(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index 486718291e59..b837714e04a7 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -38,6 +38,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { ShardsIds.insert(shardId); auto& shardInfo = ShardsInfo[shardId]; shardInfo.IsOlap = isOlap; + HasOlapTableShard |= isOlap; const auto [stringsIter, _] = TablePathes.insert(path); const TStringBuf pathBuf = *stringsIter; @@ -168,9 +169,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } bool HasOlapTable() const override { - return std::any_of(std::begin(ShardsInfo), std::end(ShardsInfo), [](const auto& element) { - return element.second.IsOlap; - }); + return HasOlapTableShard; } bool IsEmpty() const override { @@ -187,7 +186,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { } bool IsVolatile() const override { - return true; + return !HasOlapTable(); } bool HasSnapshot() const override { @@ -219,15 +218,24 @@ class TKqpTransactionManager : public IKqpTransactionManager { AFL_ENSURE(State == ETransactionState::COLLECTING); AFL_ENSURE(!IsReadOnly()); + THashSet sendingColumnShardsSet; + THashSet receivingColumnShardsSet; + for (auto& [shardId, shardInfo] : ShardsInfo) { if ((shardInfo.Flags & EAction::WRITE)) { ReceivingShards.insert(shardId); if (IsVolatile()) { SendingShards.insert(shardId); } + if (shardInfo.IsOlap) { + sendingColumnShardsSet.insert(shardId); + } } if (!shardInfo.Locks.empty()) { SendingShards.insert(shardId); + if (shardInfo.IsOlap) { + receivingColumnShardsSet.insert(shardId); + } } AFL_ENSURE(shardInfo.State == EShardState::PROCESSING); @@ -255,6 +263,18 @@ class TKqpTransactionManager : public IKqpTransactionManager { } } + if (!receivingColumnShardsSet.empty() || !sendingColumnShardsSet.empty()) { + AFL_ENSURE(!IsVolatile()); + const auto& shards = receivingColumnShardsSet.empty() + ? sendingColumnShardsSet + : receivingColumnShardsSet; + + const ui32 index = RandomNumber(shards.size()); + auto arbiterIterator = std::begin(shards); + std::advance(arbiterIterator, index); + ArbiterColumnShard = *arbiterIterator; + } + ShardsToWaitPrepare = ShardsIds; MinStep = std::numeric_limits::min(); @@ -271,6 +291,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { .SendingShards = SendingShards, .ReceivingShards = ReceivingShards, .Arbiter = Arbiter, + .ArbiterColumnShard = ArbiterColumnShard, }; return result; @@ -311,7 +332,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { shardInfo.State = EShardState::EXECUTING; } - AFL_ENSURE(ReceivingShards.empty() || !IsSingleShard() || HasOlapTable()); + AFL_ENSURE(ReceivingShards.empty() || !IsSingleShard()); } TCommitInfo GetCommitInfo() override { @@ -384,11 +405,13 @@ class TKqpTransactionManager : public IKqpTransactionManager { bool ReadOnly = true; bool ValidSnapshot = false; + bool HasOlapTableShard = false; std::optional LocksIssue; THashSet SendingShards; THashSet ReceivingShards; std::optional Arbiter; + std::optional ArbiterColumnShard; THashSet ShardsToWaitPrepare; diff --git a/ydb/core/kqp/common/kqp_tx_manager.h b/ydb/core/kqp/common/kqp_tx_manager.h index b868cd35e8ff..03dd73ad034b 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.h +++ b/ydb/core/kqp/common/kqp_tx_manager.h @@ -75,8 +75,8 @@ class IKqpTransactionManager { struct TPrepareInfo { const THashSet& SendingShards; const THashSet& ReceivingShards; - std::optional Arbiter; // TODO: support non-volatile - std::optional ArbiterColumnShard; // TODO: support columnshard&topic + std::optional Arbiter; + std::optional ArbiterColumnShard; }; virtual TPrepareInfo GetPrepareTransactionInfo() = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 974c8e2d3365..bfdfcb2bcabc 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -257,7 +257,10 @@ class TKqpDataExecuter : public TKqpExecuterBaseAddShard(lock.GetDataShard(), stageInfo.Meta.TableKind == ETableKind::Olap, stageInfo.Meta.TablePath); - TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); + TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::WRITE); + if (info.GetHasRead()) { + TxManager->AddAction(lock.GetDataShard(), IKqpTransactionManager::EAction::READ); + } TxManager->AddLock(lock.GetDataShard(), lock); } } diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index 6e541c7e8594..522f8bc053c9 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -949,7 +949,9 @@ class TKqpExecuterBase : public TActorBootstrapped { settings.SetLockTxId(*lockTxId); settings.SetLockNodeId(SelfId().NodeId()); } - ActorIdToProto(BufferActorId, settings.MutableBufferActorId()); + if (!settings.GetInconsistentTx() && !settings.GetIsOlap()) { + ActorIdToProto(BufferActorId, settings.MutableBufferActorId()); + } output.SinkSettings.ConstructInPlace(); output.SinkSettings->PackFrom(settings); } else { diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index be84a22fc5d5..b8f78803d853 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -549,7 +549,8 @@ {"Index": 1, "Name": "InconsistentWrite", "Type": "TCoAtom"}, {"Index": 2, "Name": "Mode", "Type": "TCoAtom"}, {"Index": 3, "Name": "Priority", "Type": "TCoAtom"}, - {"Index": 4, "Name": "Settings", "Type": "TCoNameValueTupleList", "Optional": true} + {"Index": 4, "Name": "TableType", "Type": "TCoAtom"}, + {"Index": 5, "Name": "Settings", "Type": "TCoNameValueTupleList", "Optional": true} ] }, { diff --git a/ydb/core/kqp/host/kqp_type_ann.cpp b/ydb/core/kqp/host/kqp_type_ann.cpp index 36249024fea4..c1eac4a75c47 100644 --- a/ydb/core/kqp/host/kqp_type_ann.cpp +++ b/ydb/core/kqp/host/kqp_type_ann.cpp @@ -1859,7 +1859,7 @@ TStatus AnnotateKqpSinkEffect(const TExprNode::TPtr& node, TExprContext& ctx) { } TStatus AnnotateTableSinkSettings(const TExprNode::TPtr& input, TExprContext& ctx) { - if (!EnsureMinMaxArgsCount(*input, 4, 5, ctx)) { + if (!EnsureMinMaxArgsCount(*input, 5, 6, ctx)) { return TStatus::Error; } input->SetTypeAnn(ctx.MakeType()); diff --git a/ydb/core/kqp/opt/kqp_opt_effects.cpp b/ydb/core/kqp/opt/kqp_opt_effects.cpp index 250e32e6b26d..2dd9dd91ee40 100644 --- a/ydb/core/kqp/opt/kqp_opt_effects.cpp +++ b/ydb/core/kqp/opt/kqp_opt_effects.cpp @@ -232,7 +232,7 @@ TCoAtomList BuildKeyColumnsList(const TKikimrTableDescription& table, TPositionH } TDqStage RebuildPureStageWithSink(TExprBase expr, const TKqpTable& table, - const bool allowInconsistentWrites, const TStringBuf mode, const i64 order, TExprContext& ctx) { + const bool allowInconsistentWrites, const TStringBuf mode, const i64 order, const bool isOlap, TExprContext& ctx) { Y_DEBUG_ABORT_UNLESS(IsDqPureExpr(expr)); return Build(ctx, expr.Pos()) @@ -258,6 +258,7 @@ TDqStage RebuildPureStageWithSink(TExprBase expr, const TKqpTable& table, : ctx.NewAtom(expr.Pos(), "false")) .Mode(ctx.NewAtom(expr.Pos(), mode)) .Priority(ctx.NewAtom(expr.Pos(), ToString(order))) + .TableType(ctx.NewAtom(expr.Pos(), isOlap ? "olap" : "oltp")) .Settings() .Build() .Build() @@ -307,13 +308,14 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const } sinkEffect = NeedSinks(table, kqpCtx) || (kqpCtx.IsGenericQuery() && settings.AllowInconsistentWrites); - const i64 priority = (table.Metadata->Kind == EKikimrTableKind::Olap) ? 0 : order; + const bool isOlap = (table.Metadata->Kind == EKikimrTableKind::Olap); + const i64 priority = isOlap ? 0 : order; if (IsDqPureExpr(node.Input())) { if (sinkEffect) { stageInput = RebuildPureStageWithSink( node.Input(), node.Table(), - settings.AllowInconsistentWrites, settings.Mode, priority, ctx); + settings.AllowInconsistentWrites, settings.Mode, priority, isOlap, ctx); effect = Build(ctx, node.Pos()) .Stage(stageInput.Cast().Ptr()) .SinkIndex().Build("0") @@ -355,6 +357,7 @@ bool BuildUpsertRowsEffect(const TKqlUpsertRows& node, TExprContext& ctx, const : ctx.NewAtom(node.Pos(), "false")) .Mode(ctx.NewAtom(node.Pos(), settings.Mode)) .Priority(ctx.NewAtom(node.Pos(), ToString(priority))) + .TableType(ctx.NewAtom(node.Pos(), isOlap ? "olap" : "oltp")) .Settings() .Build() .Build() @@ -455,12 +458,13 @@ bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const { const auto& table = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, node.Table().Path()); sinkEffect = NeedSinks(table, kqpCtx); - const i64 priority = (table.Metadata->Kind == EKikimrTableKind::Olap) ? 0 : order; + const bool isOlap = (table.Metadata->Kind == EKikimrTableKind::Olap); + const i64 priority = isOlap ? 0 : order; if (IsDqPureExpr(node.Input())) { if (sinkEffect) { const auto keyColumns = BuildKeyColumnsList(table, node.Pos(), ctx); - stageInput = RebuildPureStageWithSink(node.Input(), node.Table(), false, "delete", priority, ctx); + stageInput = RebuildPureStageWithSink(node.Input(), node.Table(), false, "delete", priority, isOlap, ctx); effect = Build(ctx, node.Pos()) .Stage(stageInput.Cast().Ptr()) .SinkIndex().Build("0") @@ -498,6 +502,7 @@ bool BuildDeleteRowsEffect(const TKqlDeleteRows& node, TExprContext& ctx, const .InconsistentWrite(ctx.NewAtom(node.Pos(), "false")) .Mode(ctx.NewAtom(node.Pos(), "delete")) .Priority(ctx.NewAtom(node.Pos(), ToString(priority))) + .TableType(ctx.NewAtom(node.Pos(), isOlap ? "olap" : "oltp")) .Settings() .Build() .Build() diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 2e2143decdf5..0559c0ef2ec8 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -1140,6 +1140,7 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { if (const auto inconsistentWrite = settings.InconsistentWrite().Cast(); inconsistentWrite.StringValue() == "true") { settingsProto.SetInconsistentTx(true); } + settingsProto.SetIsOlap(settings.TableType().Cast().StringValue() == "olap"); if (settings.Mode().Cast().StringValue() == "replace") { settingsProto.SetType(NKikimrKqp::TKqpTableSinkSettings::MODE_REPLACE); diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 6feee5aef3a5..8a43b870eb70 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -893,6 +893,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { } void PassAway() override {; + CA_LOG_D("PassAway"); Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); TActorBootstrapped::PassAway(); } @@ -1019,6 +1020,9 @@ class TKqpDirectWriteActor : public TActorBootstrapped, pu for (const auto& lock : WriteTableActor->GetLocks()) { resultInfo.AddLocks()->CopyFrom(lock); } + resultInfo.SetHasRead( + GetOperation(Settings.GetType()) == NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT || + GetOperation(Settings.GetType()) == NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE); google::protobuf::Any result; result.PackFrom(resultInfo); return result; @@ -1505,7 +1509,9 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub transaction.SetTxId(*TxId); transaction.SetMinStep(commitInfo.MinStep); transaction.SetMaxStep(commitInfo.MaxStep); - transaction.SetFlags(TEvTxProxy::TEvProposeTransaction::FlagVolatile); + if (TxManager->IsVolatile()) { + transaction.SetFlags(TEvTxProxy::TEvProposeTransaction::FlagVolatile); + } for (const auto& shardInfo : commitInfo.ShardsInfo) { auto& item = *affectedSet.Add(); @@ -1654,7 +1660,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub return issues; }; - CA_LOG_D("Recv EvWriteResult from ShardID=" << ev->Get()->Record.GetOrigin() + CA_LOG_D("Recv EvWriteResult (external) from ShardID=" << ev->Get()->Record.GetOrigin() << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) << ", TxId=" << ev->Get()->Record.GetTxId() << ", Locks= " << [&]() { @@ -1856,7 +1862,7 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub if (State != EState::COMMITTING) { return; } - Y_UNUSED(shardId, dataSize); + Y_UNUSED(dataSize); if (TxManager->ConsumeCommitResult(shardId)) { CA_LOG_D("Committed"); State = EState::FINISHED; diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index df4ca7d943ab..d3c8dc38a5bf 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3084,7 +3084,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { REPLACE INTO `/Root/DataShard` SELECT * FROM `/Root/ColumnShard`; SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; SELECT * FROM `/Root/DataShard` ORDER BY Col1; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); CompareYson(R"([[1u;"test1";10];[2u;"test2";11];[3u;"test3";12];[4u;"test";13];[10u;"test1";10];[20u;"test2";11];[30u;"test3";12];[40u;"test";13]])", FormatResultSetYson(result.GetResultSet(0))); CompareYson(R"([[1u;"test1";10];[2u;"test2";11];[3u;"test3";12];[4u;"test";13];[10u;"test1";10];[20u;"test2";11];[30u;"test3";12];[40u;"test";13]])", FormatResultSetYson(result.GetResultSet(1))); diff --git a/ydb/core/protos/kqp.proto b/ydb/core/protos/kqp.proto index 990f74e650fd..21b9bf8bae1e 100644 --- a/ydb/core/protos/kqp.proto +++ b/ydb/core/protos/kqp.proto @@ -695,6 +695,7 @@ message TEvKillScanTablet { message TEvKqpOutputActorResultInfo { repeated NKikimrDataEvents.TLock Locks = 1; + optional bool HasRead = 2; } @@ -716,6 +717,7 @@ message TKqpTableSinkSettings { optional EType Type = 9; optional NActorsProto.TActorId BufferActorId = 10; optional int64 Priority = 11; + optional bool IsOlap = 12; } message TKqpStreamLookupSettings { From 8a283a02dcb6834608d9bf3e98a0b74e473224a5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Oct 2024 16:24:06 +0300 Subject: [PATCH 67/69] fix --- ydb/core/kqp/common/kqp_tx_manager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ydb/core/kqp/common/kqp_tx_manager.cpp b/ydb/core/kqp/common/kqp_tx_manager.cpp index b837714e04a7..82e280ec25dc 100644 --- a/ydb/core/kqp/common/kqp_tx_manager.cpp +++ b/ydb/core/kqp/common/kqp_tx_manager.cpp @@ -286,6 +286,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { TPrepareInfo GetPrepareTransactionInfo() override { AFL_ENSURE(State == ETransactionState::PREPARING); + AFL_ENSURE(!ReceivingShards.empty()); TPrepareInfo result { .SendingShards = SendingShards, @@ -332,7 +333,7 @@ class TKqpTransactionManager : public IKqpTransactionManager { shardInfo.State = EShardState::EXECUTING; } - AFL_ENSURE(ReceivingShards.empty() || !IsSingleShard()); + AFL_ENSURE(ReceivingShards.empty() || !IsSingleShard() || HasOlapTable()); } TCommitInfo GetCommitInfo() override { From 16e54261416a40452d3116bb4045465430848d87 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Oct 2024 20:31:57 +0300 Subject: [PATCH 68/69] fix --- .../kqp/query_compiler/kqp_query_compiler.cpp | 1 + ydb/core/kqp/runtime/kqp_write_actor.cpp | 12 ++++++---- ydb/core/kqp/runtime/kqp_write_table.cpp | 14 ++++++----- ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp | 24 +++++-------------- 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 0559c0ef2ec8..140df1d43ad2 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -1141,6 +1141,7 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { settingsProto.SetInconsistentTx(true); } settingsProto.SetIsOlap(settings.TableType().Cast().StringValue() == "olap"); + settingsProto.SetPriority(FromString(settings.Priority().Cast().StringValue())); if (settings.Mode().Cast().StringValue() == "replace") { settingsProto.SetType(NKikimrKqp::TKqpTableSinkSettings::MODE_REPLACE); diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 8a43b870eb70..8b052349e4ca 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -739,6 +739,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { void FlushBuffers() { ShardedWriteController->FlushBuffers(); + UpdateShards(); } void Flush() { @@ -808,7 +809,7 @@ class TKqpTableWriteActor : public TActorBootstrapped { return builder; }() << ", Size=" << serializationResult.TotalDataSize << ", Cookie=" << metadata->Cookie - << ", OperationsCount=" << metadata->OperationsCount << ", IsFinal=" << metadata->IsFinal + << ", OperationsCount=" << evWrite->Record.OperationsSize() << ", IsFinal=" << metadata->IsFinal << ", Attempts=" << metadata->SendAttempts << ", Mode=" << static_cast(Mode)); Send( PipeCacheId, @@ -1390,9 +1391,6 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub for (auto& [_, queue] : DataQueues) { YQL_ENSURE(queue.empty()); } - for (auto& [_, info] : WriteInfos) { - info.WriteTableActor->FlushBuffers(); - } Process(); } @@ -1628,11 +1626,17 @@ class TKqpBufferWriteActor :public TActorBootstrapped, pub void Handle(TEvKqpBuffer::TEvFlush::TPtr& ev) { ExecuterActorId = ev->Get()->ExecuterActorId; + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->FlushBuffers(); + } Flush(); } void Handle(TEvKqpBuffer::TEvCommit::TPtr& ev) { ExecuterActorId = ev->Get()->ExecuterActorId; + for (auto& [_, info] : WriteInfos) { + info.WriteTableActor->FlushBuffers(); + } if (TxManager->IsReadOnly()) { Rollback(); State = EState::FINISHED; diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 6f2709bc2c75..798cb724fe0e 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -1198,8 +1198,9 @@ class TShardedWriteController : public IShardedWriteController { const auto& writeInfo = WriteInfos.at(token); YQL_ENSURE(writeInfo.Closed); if (writeInfo.Metadata.Priority != 0) { - YQL_ENSURE(!writeInfo.Serializer->IsFinished()); - writeTokensFoFlush.push_back(token); + if (!writeInfo.Serializer->IsFinished()) { + writeTokensFoFlush.push_back(token); + } } else { YQL_ENSURE(writeInfo.Serializer->IsFinished()); } @@ -1274,12 +1275,13 @@ class TShardedWriteController : public IShardedWriteController { result.TotalDataSize += inFlightBatch.Data->GetMemory(); const ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(evWrite) .AddDataToPayload(inFlightBatch.Data->SerializeToString()); + const auto& writeInfo = WriteInfos.at(inFlightBatch.Token); evWrite.AddOperation( - WriteInfos.at(inFlightBatch.Token).Metadata.OperationType, - WriteInfos.at(inFlightBatch.Token).Metadata.TableId, - WriteInfos.at(inFlightBatch.Token).Serializer->GetWriteColumnIds(), + writeInfo.Metadata.OperationType, + writeInfo.Metadata.TableId, + writeInfo.Serializer->GetWriteColumnIds(), payloadIndex, - WriteInfos.at(inFlightBatch.Token).Serializer->GetDataFormat()); + writeInfo.Serializer->GetDataFormat()); } else { YQL_ENSURE(index + 1 == shardInfo.GetBatchesInFlight()); } diff --git a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp index aec42d54998e..a780ea2f9260 100644 --- a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp @@ -141,26 +141,14 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - if (!GetIsOlap()) { - result = session.ExecuteQuery(Q_(R"( - UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; - )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - - auto commitResult = tx->Commit().ExtractValueSync(); - - UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, commitResult.GetIssues().ToString()); - } else { - // Olap works without defer - result = session.ExecuteQuery(Q_(R"( - UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; - )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + result = session.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - auto commitResult = tx->Commit().ExtractValueSync(); + auto commitResult = tx->Commit().ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); - } + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, commitResult.GetIssues().ToString()); } }; From 537db8e2387352837d7d23d47a877e84ff137fd3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Oct 2024 22:37:39 +0300 Subject: [PATCH 69/69] fix --- ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index d3c8dc38a5bf..df4ca7d943ab 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3084,7 +3084,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { REPLACE INTO `/Root/DataShard` SELECT * FROM `/Root/ColumnShard`; SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; SELECT * FROM `/Root/DataShard` ORDER BY Col1; - )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), TExecuteQuerySettings().ClientTimeout(TDuration::MilliSeconds(1000))).ExtractValueSync(); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); CompareYson(R"([[1u;"test1";10];[2u;"test2";11];[3u;"test3";12];[4u;"test";13];[10u;"test1";10];[20u;"test2";11];[30u;"test3";12];[40u;"test";13]])", FormatResultSetYson(result.GetResultSet(0))); CompareYson(R"([[1u;"test1";10];[2u;"test2";11];[3u;"test3";12];[4u;"test";13];[10u;"test1";10];[20u;"test2";11];[30u;"test3";12];[40u;"test";13]])", FormatResultSetYson(result.GetResultSet(1)));