Skip to content

Commit

Permalink
add missing monitoring metrics for column shards (#7314)
Browse files Browse the repository at this point in the history
  • Loading branch information
swalrus1 authored Aug 7, 2024
1 parent 437bf85 commit 7ad9e1e
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 15 deletions.
6 changes: 4 additions & 2 deletions ydb/core/protos/counters_columnshard.proto
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ enum ECumulativeCounters {
COUNTER_PLAN_STEP_ACCEPTED = 9 [(CounterOpts) = {Name: "PlanStepAccepted"}];
COUNTER_SCANNED_ROWS = 10 [(CounterOpts) = {Name: "ScannedRows"}];
COUNTER_SCANNED_BYTES = 11 [(CounterOpts) = {Name: "ScannedBytes"}];
COUNTER_UPSERT_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "UpsertBlobsWritten"}];
COUNTER_UPSERT_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "UpsertBytesWritten"}];
COUNTER_OPERATIONS_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "OperationsBlobsWritten"}];
COUNTER_OPERATIONS_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "OperationsBytesWritten"}];
COUNTER_INDEXING_BLOBS_WRITTEN = 14 [(CounterOpts) = {Name: "IndexingBlobsWritten"}];
COUNTER_INDEXING_BYTES_WRITTEN = 15 [(CounterOpts) = {Name: "IndexingBytesWritten"}];
COUNTER_COMPACTION_BLOBS_WRITTEN = 16 [(CounterOpts) = {Name: "CompactionBlobsWritten"}];
Expand Down Expand Up @@ -137,6 +137,8 @@ enum ECumulativeCounters {
COUNTER_READING_EXPORTED_RANGES = 81 [(CounterOpts) = {Name: "ReadingExportedRanges"}];
COUNTER_PLANNED_TX_COMPLETED = 82 [(CounterOpts) = {Name: "PlannedTxCompleted"}];
COUNTER_IMMEDIATE_TX_COMPLETED = 83 [(CounterOpts) = {Name: "ImmediateTxCompleted"}];
COUNTER_ROWS_ERASED = 84 [(CounterOpts) = {Name: "RowsErased"}];
COUNTER_OPERATIONS_ROWS_WRITTEN = 85 [(CounterOpts) = {Name: "OperationsRowsWritten"}];
}

enum EPercentileCounters {
Expand Down
49 changes: 43 additions & 6 deletions ydb/core/tablet/tablet_counters_aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,10 +765,16 @@ class TTabletMon {
TCounterPtr DatashardSizeBytes;
TCounterPtr DatashardCacheHitBytes;
TCounterPtr DatashardCacheMissBytes;
TCounterPtr ColumnShardReadRows_;
TCounterPtr ColumnShardReadBytes_;
TCounterPtr ColumnShardScanRows_;
TCounterPtr ColumnShardScanBytes_;
TCounterPtr ColumnShardWriteRows_;
TCounterPtr ColumnShardWriteBytes_;
TCounterPtr ColumnShardBulkUpsertRows_;
TCounterPtr ColumnShardBulkUpsertBytes_;
TCounterPtr ColumnShardEraseRows_;
TCounterPtr ColumnShardEraseBytes_;
TCounterPtr ResourcesStorageUsedBytes;
TCounterPtr ResourcesStorageUsedBytesOnSsd;
TCounterPtr ResourcesStorageUsedBytesOnHdd;
Expand All @@ -787,6 +793,7 @@ class TTabletMon {
TCounterPtr ResourcesStreamReservedStorageLimit;

THistogramPtr ShardCpuUtilization;
THistogramPtr ColumnShardCpuUtilization;

TCounterPtr RowUpdates;
TCounterPtr RowUpdateBytes;
Expand All @@ -808,8 +815,11 @@ class TTabletMon {

TCounterPtr ColumnShardScannedBytes_;
TCounterPtr ColumnShardScannedRows_;
TCounterPtr ColumnShardUpsertBlobsWritten_;
TCounterPtr ColumnShardUpsertBytesWritten_;
TCounterPtr ColumnShardOperationsRowsWritten_;
TCounterPtr ColumnShardOperationsBytesWritten_;
TCounterPtr ColumnShardErasedBytes_;
TCounterPtr ColumnShardErasedRows_;
THistogramPtr ColumnShardConsumedCpuHistogram;

TCounterPtr DiskSpaceTablesTotalBytes;
TCounterPtr DiskSpaceTablesTotalBytesOnSsd;
Expand Down Expand Up @@ -859,14 +869,26 @@ class TTabletMon {
DatashardCacheMissBytes = ydbGroup->GetNamedCounter("name",
"table.datashard.cache_miss.bytes", true);

ColumnShardReadRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.read.rows", true);
ColumnShardReadBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.read.bytes", true);
ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.scan.rows", true);
ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.scan.bytes", true);
ColumnShardWriteRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.write.rows", true);
ColumnShardWriteBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.write.bytes", true);
ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.bulk_upsert.rows", true);
ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.bulk_upsert.bytes", true);
ColumnShardEraseRows_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.erase.rows", true);
ColumnShardEraseBytes_ = ydbGroup->GetNamedCounter("name",
"table.columnshard.erase.bytes", true);

ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name",
"resources.storage.used_bytes", false);
Expand Down Expand Up @@ -908,6 +930,8 @@ class TTabletMon {

ShardCpuUtilization = ydbGroup->GetNamedHistogram("name",
"table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false);
ColumnShardCpuUtilization = ydbGroup->GetNamedHistogram("name",
"table.columnshard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false);
};

void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) {
Expand Down Expand Up @@ -943,8 +967,11 @@ class TTabletMon {

ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes");
ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows");
ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten");
ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten");
ColumnShardOperationsRowsWritten_ = appGroup->GetCounter("ColumnShard/OperationsRowsWritten");
ColumnShardOperationsBytesWritten_ = appGroup->GetCounter("ColumnShard/OperationsBytesWritten");
ColumnShardErasedBytes_ = appGroup->GetCounter("ColumnShard/BytesErased");
ColumnShardErasedRows_ = appGroup->GetCounter("ColumnShard/RowsErased");
ColumnShardConsumedCpuHistogram = appGroup->FindHistogram("HIST(ConsumedCPU)");
}

if (hasSchemeshard && !DiskSpaceTablesTotalBytes) {
Expand Down Expand Up @@ -990,10 +1017,20 @@ class TTabletMon {
}

if (ColumnShardScannedBytes_) {
ColumnShardReadRows_->Set(0);
ColumnShardReadBytes_->Set(0);
ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val());
ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val());
ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val());
ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val());
ColumnShardWriteRows_->Set(ColumnShardOperationsRowsWritten_->Val());
ColumnShardWriteBytes_->Set(ColumnShardOperationsBytesWritten_->Val());
ColumnShardBulkUpsertRows_->Set(ColumnShardOperationsRowsWritten_->Val());
ColumnShardBulkUpsertBytes_->Set(ColumnShardOperationsBytesWritten_->Val());
ColumnShardEraseRows_->Set(ColumnShardErasedRows_->Val());
ColumnShardEraseBytes_->Set(ColumnShardErasedBytes_->Val());

if (ColumnShardConsumedCpuHistogram) {
TransferBuckets(ColumnShardCpuUtilization, ColumnShardConsumedCpuHistogram);
}
}

if (DiskSpaceTablesTotalBytes) {
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/tx/columnshard/columnshard__write.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo
wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator());
} else {
const TMonotonic now = TMonotonic::Now();
Counters.GetCSCounters().OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant());
Counters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant(), aggr->GetRows());
Counters.GetCSCounters().OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant());
Counters.GetCSCounters().OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant());
Counters.GetCSCounters().OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant());
Expand Down
5 changes: 5 additions & 0 deletions ydb/core/tx/columnshard/counters/counters_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ class TCountersManager {
BackgroundControllerCounters->FillTotalStats(tableStats);
ScanCounters.FillStats(tableStats);
}

void OnWritePutBlobsSuccess(const TDuration d, const ui64 rowsWritten) const {
TabletCounters->OnWritePutBlobsSuccess(rowsWritten);
CSCounters.OnWritePutBlobsSuccess(d);
}
};

} // namespace NKikimr::NColumnShard
19 changes: 14 additions & 5 deletions ydb/core/tx/columnshard/counters/tablet_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,8 @@ class TTabletCountersHandle {
}

void OnWriteSuccess(const ui64 blobsWritten, const ui64 bytesWritten) const {
IncCounter(NColumnShard::COUNTER_UPSERT_BLOBS_WRITTEN, blobsWritten);
IncCounter(NColumnShard::COUNTER_UPSERT_BYTES_WRITTEN, bytesWritten);
// self.Stats.GetTabletCounters().IncCounter(NColumnShard::COUNTER_RAW_BYTES_UPSERTED, insertedBytes);
IncCounter(NColumnShard::COUNTER_OPERATIONS_BLOBS_WRITTEN, blobsWritten);
IncCounter(NColumnShard::COUNTER_OPERATIONS_BYTES_WRITTEN, bytesWritten);
IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS);
}

Expand Down Expand Up @@ -106,9 +105,19 @@ class TTabletCountersHandle {
IncCounter(NColumnShard::COUNTER_INDEXING_TIME, duration.MilliSeconds());
}

void OnWritePutBlobsSuccess(const ui64 rowsWritten) const {
IncCounter(NColumnShard::COUNTER_OPERATIONS_ROWS_WRITTEN, rowsWritten);
}

void OnDropPortionEvent(const ui64 rawBytes, const ui64 blobBytes, const ui64 rows) const {
IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, rawBytes);
IncCounter(NColumnShard::COUNTER_BYTES_ERASED, blobBytes);
IncCounter(NColumnShard::COUNTER_ROWS_ERASED, rows);
}

void FillStats(::NKikimrTableStats::TTableStats& output) const {
output.SetRowUpdates(GetValue(COUNTER_WRITE_SUCCESS));
output.SetRowDeletes(0); // manual deletes are not supported
output.SetRowUpdates(GetValue(COUNTER_OPERATIONS_ROWS_WRITTEN));
output.SetRowDeletes(GetValue(COUNTER_ROWS_ERASED));
output.SetRowReads(0); // all reads are range reads
output.SetRangeReadRows(GetValue(COUNTER_READ_INDEX_ROWS));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::T
if (self) {
self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size());
for (auto&& p : PortionsToDrop) {
self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.GetTotalRawBytes());
self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.NumRows());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class TWriteAggregation {
NEvWrite::TWriteMeta WriteMeta;
YDB_READONLY(ui64, SchemaVersion, 0);
YDB_READONLY(ui64, Size, 0);
YDB_READONLY(ui64, Rows, 0);
YDB_ACCESSOR_DEF(std::vector<TWideSerializedBatch>, SplittedBlobs);
YDB_READONLY_DEF(TVector<TWriteId>, WriteIds);
YDB_READONLY_DEF(std::shared_ptr<NOlap::IBlobsWritingAction>, BlobsAction);
Expand Down Expand Up @@ -117,6 +118,9 @@ class TWriteAggregation {
for (auto&& s : splittedBlobs) {
SplittedBlobs.emplace_back(std::move(s), *this);
}
for (const auto& batch : SplittedBlobs) {
Rows += batch->GetRowsCount();
}
}

TWriteAggregation(const NEvWrite::TWriteData& writeData)
Expand Down

0 comments on commit 7ad9e1e

Please sign in to comment.