From 2020867a79bf676f613a16633bd84cf1172272fc Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 9 Dec 2024 08:51:22 +0300 Subject: [PATCH] split logging services through huge messages volume in summary logs (#12382) --- ydb/core/tx/columnshard/columnshard.cpp | 2 +- .../tx/columnshard/columnshard__write.cpp | 22 +++++++++---------- .../changes/compaction/sparsed/logic.cpp | 1 + .../tx/columnshard/engines/column_engine.cpp | 6 ++--- .../engines/column_engine_logs.cpp | 4 ++-- ydb/core/tx/data_events/events.h | 2 +- ydb/library/services/services.proto | 2 ++ 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index 117efd69d2e7..7567e520b9fb 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -411,7 +411,7 @@ void TColumnShard::SendPeriodicStats() { const TInstant now = TAppData::TimeProvider->Now(); if (LastStatsReport + StatsReportInterval > now) { - LOG_S_TRACE("Skip send periodic stats: report interavl = " << StatsReportInterval); + LOG_S_TRACE("Skip send periodic stats: report interval = " << StatsReportInterval); return; } LastStatsReport = now; diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index abc5b19061f0..06730fa9cdbf 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -46,7 +46,7 @@ void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const Y_ABORT("invalid function usage"); } - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "write_overload")("size", writeSize)("path_id", writeMeta.GetTableId())( + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "write_overload")("size", writeSize)("path_id", writeMeta.GetTableId())( "reason", overloadReason); ctx.Send(writeMeta.GetSource(), event.release(), 0, cookie); @@ -70,17 +70,17 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 pathId) c ui64 writesLimit = Settings.OverloadWritesInFlight; ui64 writesSizeLimit = Settings.OverloadWritesSizeInFlight; if (txLimit && Executor()->GetStats().TxInFly > txLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)( + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)( "limit", txLimit); return EOverloadStatus::ShardTxInFly; } if (writesLimit && Counters.GetWritesMonitor()->GetWritesInFlight() > writesLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_in_fly")( + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "shard_overload")("reason", "writes_in_fly")( "sum", Counters.GetWritesMonitor()->GetWritesInFlight())("limit", writesLimit); return EOverloadStatus::ShardWritesInFly; } if (writesSizeLimit && Counters.GetWritesMonitor()->GetWritesSizeInFlight() > writesSizeLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_size_in_fly")( + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "shard_overload")("reason", "writes_size_in_fly")( "sum", Counters.GetWritesMonitor()->GetWritesSizeInFlight())("limit", writesSizeLimit); return EOverloadStatus::ShardWritesSizeInFly; } @@ -90,7 +90,7 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 pathId) c void TColumnShard::Handle(NPrivateEvents::NWrite::TEvWritePortionResult::TPtr& ev, const TActorContext& ctx) { TMemoryProfileGuard mpg("TEvWritePortionResult"); NActors::TLogContextGuard gLogging = - NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWritePortionResult"); + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_WRITE)("tablet_id", TabletID())("event", "TEvWritePortionResult"); std::vector noDataWrites = ev->Get()->DetachNoDataWrites(); for (auto&& i : noDataWrites) { Counters.GetWritesMonitor()->OnFinishWrite(i.GetDataSize(), 1); @@ -123,7 +123,7 @@ void TColumnShard::Handle(NPrivateEvents::NWrite::TEvWritePortionResult::TPtr& e void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx) { NActors::TLogContextGuard gLogging = - NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_WRITE)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); auto& putResult = ev->Get()->GetPutResult(); OnYellowChannels(putResult); @@ -228,7 +228,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex }; if (!AppDataVerified().ColumnShardConfig.GetWritingEnabled()) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_writing")("reason", "disabled"); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "skip_writing")("reason", "disabled"); return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::Disabled); } @@ -245,7 +245,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex { const ui64 minMemoryRead = portionsIndex.GetMinRawMemoryRead(); if (NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit < minMemoryRead) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_raw_memory")("current", minMemoryRead)( + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "overlimit")("reason", "read_raw_memory")("current", minMemoryRead)( "limit", NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadRawMemory); } @@ -254,7 +254,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex { const ui64 minMemoryRead = portionsIndex.GetMinBlobMemoryRead(); if (NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit < minMemoryRead) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_blob_memory")("current", minMemoryRead)( + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_WRITE)("event", "overlimit")("reason", "read_blob_memory")("current", minMemoryRead)( "limit", NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadBlobMemory); } @@ -466,13 +466,13 @@ class TAbortWriteTransaction: public NTabletFlatExecutor::TTransactionBaseGet()->Record; const auto source = ev->Sender; const auto cookie = ev->Cookie; const auto behaviourConclusion = TOperationsManager::GetBehaviour(*ev->Get()); - // AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("ev_write", record.DebugString()); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_WRITE)("ev_write", record.DebugString()); if (behaviourConclusion.IsFail()) { Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp index d2c4e14f0664..ddcb51e4104c 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp @@ -160,6 +160,7 @@ void TSparsedMerger::TCursor::InitArrays(const ui32 position) { SparsedCursor = std::make_shared(sparsedArray, &*CurrentOwnedArray); PlainCursor = nullptr; } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_COMPACTION)("event", "plain_merger"); PlainCursor = make_shared(CurrentOwnedArray->GetArray(), &*CurrentOwnedArray); SparsedCursor = nullptr; } diff --git a/ydb/core/tx/columnshard/engines/column_engine.cpp b/ydb/core/tx/columnshard/engines/column_engine.cpp index 51c2bc6cf2f2..d21354297ee1 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine.cpp @@ -17,14 +17,14 @@ const std::shared_ptr& IColumnEngine::GetReplaceKey() const { ui64 IColumnEngine::GetMetadataLimit() { static const ui64 MemoryTotal = NSystemInfo::TotalMemorySize(); if (!HasAppData()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("total", MemoryTotal); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_WRITE)("total", MemoryTotal); return MemoryTotal * 0.3; } else if (AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().HasAbsoluteValue()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)( + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_WRITE)( "value", AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetAbsoluteValue()); return AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetAbsoluteValue(); } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("total", MemoryTotal)( + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_WRITE)("total", MemoryTotal)( "kff", AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetTotalRatio()); return MemoryTotal * AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetTotalRatio(); } diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 566db5b6f5cb..518513b5635d 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -437,9 +437,9 @@ std::vector> TColumnEngineForLogs::Star AFL_WARN(NKikimrServices::TX_COLUMNSHARD_ACTUALIZATION)("event", "StartTtl")("skip", "not_ready_tiers"); } std::vector> result; - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_ACTUALIZATION)("event", "StartTtl")("rw_tasks_count", context.GetTasks().size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_ACTUALIZATION)("event", "StartTtl")("rw_tasks_count", context.GetTasks().size()); for (auto&& i : context.GetTasks()) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_ACTUALIZATION)("event", "StartTtl")("rw", i.first.DebugString())("count", i.second.size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_ACTUALIZATION)("event", "StartTtl")("rw", i.first.DebugString())("count", i.second.size()); for (auto&& t : i.second) { SignalCounters.OnActualizationTask(t.GetTask()->GetPortionsToEvictCount(), t.GetTask()->GetPortionsToRemoveSize()); result.emplace_back(t.GetTask()); diff --git a/ydb/core/tx/data_events/events.h b/ydb/core/tx/data_events/events.h index f4b190eaff82..87279ec784f0 100644 --- a/ydb/core/tx/data_events/events.h +++ b/ydb/core/tx/data_events/events.h @@ -96,7 +96,7 @@ struct TDataEvents { static std::unique_ptr BuildError(const ui64 origin, const ui64 txId, const NKikimrDataEvents::TEvWriteResult::EStatus& status, const TString& errorMsg) { auto result = std::make_unique(); - ACFL_ERROR("event", "ev_write_error")("status", NKikimrDataEvents::TEvWriteResult::EStatus_Name(status))("details", errorMsg)("tx_id", txId); + ACFL_WARN("event", "ev_write_error")("status", NKikimrDataEvents::TEvWriteResult::EStatus_Name(status))("details", errorMsg)("tx_id", txId); result->Record.SetOrigin(origin); result->Record.SetTxId(txId); result->Record.SetStatus(status); diff --git a/ydb/library/services/services.proto b/ydb/library/services/services.proto index 466afc701ada..76cf699b2da7 100644 --- a/ydb/library/services/services.proto +++ b/ydb/library/services/services.proto @@ -299,6 +299,8 @@ enum EServiceKikimr { CONTINUOUS_BACKUP = 804; TX_COLUMNSHARD_ACTUALIZATION = 850; + TX_COLUMNSHARD_COMPACTION = 851; + TX_COLUMNSHARD_WRITE = 852; // System views SYSTEM_VIEWS = 900;