From 38c4c9ce46a1ee1911cfc034181b52282947cdca Mon Sep 17 00:00:00 2001 From: Ilia Shakhov Date: Thu, 28 Dec 2023 14:41:21 +0000 Subject: [PATCH] Collect stats during lifetime KIKIMR-19289 --- ydb/core/mind/hive/hive_impl.cpp | 19 ++++++++- ydb/core/mind/hive/hive_impl.h | 5 +++ ydb/core/mind/hive/leader_tablet_info.cpp | 19 +++++++++ ydb/core/mind/hive/monitoring.cpp | 49 +++++----------------- ydb/core/mind/hive/node_info.cpp | 2 + ydb/core/mind/hive/tx__load_everything.cpp | 4 +- 6 files changed, 57 insertions(+), 41 deletions(-) diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 98f031b6ef1e..fb819befe4a9 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -1520,7 +1520,9 @@ void THive::DeleteTablet(TTabletId tabletId) { } Y_ENSURE_LOG(nt->second.LockedTablets.count(&tablet) == 0, " Deleting tablet found on node " << nt->first << " in locked set"); } - UpdateCounterTabletsTotal(-1 - (tablet.Followers.size())); + const i64 tabletsTotalDiff = -1 - (tablet.Followers.size()); + UpdateCounterTabletsTotal(tabletsTotalDiff); + UpdateTabletsTotalByDomain(tabletsTotalDiff, tablet.ObjectDomain); Tablets.erase(it); } } @@ -1553,6 +1555,21 @@ void THive::KillNode(TNodeId nodeId, const TActorId& local) { Execute(CreateKillNode(nodeId, local)); } +void THive::UpdateTabletsTotalByDomain(i64 tabletsTotalDiff, const TSubDomainKey& objectDomain) { + if (objectDomain) { + TabletsTotalByDomain[objectDomain] += tabletsTotalDiff; + } +} + +void THive::UpdateTabletsAliveByDomain(i64 tabletsAliveDiff, const TSubDomainKey& objectDomain, const TSubDomainKey& tabletNodeDomain) { + if (objectDomain) { + TabletsAliveByDomain[objectDomain] += tabletsAliveDiff; + if (objectDomain == tabletNodeDomain) { + TabletsAliveInObjectDomainByDomain[objectDomain] += tabletsAliveDiff; + } + } +} + void THive::SetCounterTabletsTotal(ui64 tabletsTotal) { if (TabletCounters != nullptr) { auto& counter = TabletCounters->Simple()[NHive::COUNTER_TABLETS_TOTAL]; diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index e3318ad1e42f..74e7f9b74d3a 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -323,6 +323,9 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar ui32 DataCenters = 1; ui32 RegisteredDataCenters = 1; TObjectDistributions ObjectDistributions; + std::unordered_map TabletsTotalByDomain; + std::unordered_map TabletsAliveByDomain; + std::unordered_map TabletsAliveInObjectDomainByDomain; bool AreWeRootHive() const { return RootHiveId == HiveId; } bool AreWeSubDomainHive() const { return RootHiveId != HiveId; } @@ -628,6 +631,8 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar TTabletCategoryInfo& GetTabletCategory(TTabletCategoryId tabletCategoryId); void KillNode(TNodeId nodeId, const TActorId& local); void AddToBootQueue(TTabletInfo* tablet); + void UpdateTabletsTotalByDomain(i64 tabletsTotalDiff, const TSubDomainKey& objectDomain); + void UpdateTabletsAliveByDomain(i64 tabletsAliveDiff, const TSubDomainKey& objectDomain, const TSubDomainKey& tabletNodeDomain); void SetCounterTabletsTotal(ui64 tabletsTotal); void UpdateCounterTabletsTotal(i64 tabletsTotalDiff); void UpdateCounterTabletsAlive(i64 tabletsAliveDiff); diff --git a/ydb/core/mind/hive/leader_tablet_info.cpp b/ydb/core/mind/hive/leader_tablet_info.cpp index 04a1892eb570..5ef3eb8e69d7 100644 --- a/ydb/core/mind/hive/leader_tablet_info.cpp +++ b/ydb/core/mind/hive/leader_tablet_info.cpp @@ -63,6 +63,8 @@ TFollowerId TLeaderTabletInfo::GetFollowerPromotableOnNode(TNodeId nodeId) const } void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const TVector& allowedDomains) { + const TSubDomainKey oldObjectDomain = ObjectDomain; + if (!allowedDomains.empty()) { NodeFilter.AllowedDomains = allowedDomains; if (!objectDomain) { @@ -80,6 +82,22 @@ void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const T for (auto& followerGroup : FollowerGroups) { followerGroup.NodeFilter.AllowedDomains = NodeFilter.AllowedDomains; } + + const ui64 leaderAndFollowers = 1 + Followers.size(); + Hive.UpdateTabletsTotalByDomain(-leaderAndFollowers, oldObjectDomain); + Hive.UpdateTabletsTotalByDomain(+leaderAndFollowers, ObjectDomain); + + if (IsAlive()) { + Hive.UpdateTabletsAliveByDomain(-1, oldObjectDomain, Node->GetServicedDomain()); + Hive.UpdateTabletsAliveByDomain(+1, ObjectDomain, Node->GetServicedDomain()); + } + + for (const auto& follower : Followers) { + if (follower.IsAlive()) { + Hive.UpdateTabletsAliveByDomain(-1, oldObjectDomain, follower.Node->GetServicedDomain()); + Hive.UpdateTabletsAliveByDomain(+1, ObjectDomain, follower.Node->GetServicedDomain()); + } + } } bool TLeaderTabletInfo::InitiateAssignTabletGroups() { @@ -125,6 +143,7 @@ TFollowerTabletInfo& TLeaderTabletInfo::AddFollower(TFollowerGroup& followerGrou follower.Id = followerId; } Hive.UpdateCounterTabletsTotal(+1); + Hive.UpdateTabletsTotalByDomain(+1, ObjectDomain); return follower; } diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index b5872b9d2b4b..a69bf874d128 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -456,37 +456,6 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase { } void RenderHTMLPage(IOutputStream &out) { - THashMap tabletsRunningInObjectDomain; - THashMap tabletsRunningInOtherDomains; - THashMap tabletsTotal; - - for (const auto& [_, tablet] : Self->Tablets) { - const TSubDomainKey objectDomain = tablet.ObjectDomain; - ++tabletsTotal[objectDomain]; - - const TNodeInfo* node = tablet.GetNode(); - if (node) { - if (node->GetServicedDomain() == objectDomain) { - ++tabletsRunningInObjectDomain[objectDomain]; - } else { - ++tabletsRunningInOtherDomains[objectDomain]; - } - } - - for (const auto& follower : tablet.Followers) { - ++tabletsTotal[objectDomain]; - - const TNodeInfo* followerNode = follower.GetNode(); - if (followerNode) { - if (followerNode->GetServicedDomain() == objectDomain) { - ++tabletsRunningInObjectDomain[objectDomain]; - } else { - ++tabletsRunningInOtherDomains[objectDomain]; - } - } - } - } - // out << ""; out << ""; out << ""; @@ -495,8 +464,8 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase { out << ""; out << ""; out << ""; - out << ""; - out << ""; + out << ""; + out << ""; out << ""; out << ""; out << ""; @@ -521,16 +490,18 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase { out << ""; out << ""; } - if (tabletsTotal[domainKey] > 0) { - out << ""; - out << ""; + if (Self->TabletsTotalByDomain[domainKey] > 0) { + out << ""; + + const ui64 tabletsAliveInOtherDomains = Self->TabletsAliveByDomain[domainKey] - Self->TabletsAliveInObjectDomainByDomain[domainKey]; + out << ""; } else { out << ""; out << ""; } - out << ""; + out << ""; out << ""; } out << ""; diff --git a/ydb/core/mind/hive/node_info.cpp b/ydb/core/mind/hive/node_info.cpp index 77d726eaa3ef..a6394ccbf1e3 100644 --- a/ydb/core/mind/hive/node_info.cpp +++ b/ydb/core/mind/hive/node_info.cpp @@ -69,6 +69,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV TabletsRunningByType[tablet->GetTabletType()].erase(tablet); TabletsOfObject[tablet->GetObjectId()].erase(tablet); Hive.UpdateCounterTabletsAlive(-1); + Hive.UpdateTabletsAliveByDomain(-1, tablet->GetLeader().ObjectDomain, GetServicedDomain()); if (tablet->HasCounter() && tablet->IsLeader()) { Hive.UpdateObjectCount(tablet->AsLeader(), *this, -1); } @@ -84,6 +85,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV TabletsRunningByType[tablet->GetTabletType()].emplace(tablet); TabletsOfObject[tablet->GetObjectId()].emplace(tablet); Hive.UpdateCounterTabletsAlive(+1); + Hive.UpdateTabletsAliveByDomain(+1, tablet->GetLeader().ObjectDomain, GetServicedDomain()); if (tablet->HasCounter() && tablet->IsLeader()) { Hive.UpdateObjectCount(tablet->AsLeader(), *this, +1); } diff --git a/ydb/core/mind/hive/tx__load_everything.cpp b/ydb/core/mind/hive/tx__load_everything.cpp index 73525a3752bd..5fba082c99b2 100644 --- a/ydb/core/mind/hive/tx__load_everything.cpp +++ b/ydb/core/mind/hive/tx__load_everything.cpp @@ -706,11 +706,13 @@ class TTxLoadEverything : public TTransactionBase { void Complete(const TActorContext& ctx) override { BLOG_NOTICE("THive::TTxLoadEverything::Complete " << Self->DatabaseConfig.ShortDebugString()); - i64 tabletsTotal = 0; + ui64 tabletsTotal = 0; for (auto it = Self->Tablets.begin(); it != Self->Tablets.end(); ++it) { ++tabletsTotal; + Self->UpdateTabletsTotalByDomain(+1, it->second.ObjectDomain); for (const TTabletInfo& follower : it->second.Followers) { ++tabletsTotal; + Self->UpdateTabletsTotalByDomain(+1, it->second.ObjectDomain); if (follower.IsLeader()) { follower.AsLeader(); } else {
NameHiveStatusTabletsRunningInTenantDomainTabletsRunningInOtherDomainsTabletsAliveInTenantDomainTabletsAliveInOtherDomainsTabletsTotal
--" << std::round(tabletsRunningInObjectDomain[domainKey] * 100.0 / tabletsTotal[domainKey]) << "%" - << " (" << tabletsRunningInObjectDomain[domainKey] << " of " << tabletsTotal[domainKey] << ")" << "" << std::round(tabletsRunningInOtherDomains[domainKey] * 100.0 / tabletsTotal[domainKey]) << "%" - << " (" << tabletsRunningInOtherDomains[domainKey] << " of " << tabletsTotal[domainKey] << ")" << "" << std::round(Self->TabletsAliveInObjectDomainByDomain[domainKey] * 100.0 / Self->TabletsTotalByDomain[domainKey]) << "%" + << " (" << Self->TabletsAliveInObjectDomainByDomain[domainKey] << " of " << Self->TabletsTotalByDomain[domainKey] << ")" << "" << std::round(tabletsAliveInOtherDomains * 100.0 / Self->TabletsTotalByDomain[domainKey]) << "%" + << " (" << tabletsAliveInOtherDomains << " of " << Self->TabletsTotalByDomain[domainKey] << ")" << "--" << tabletsTotal[domainKey] << "" << Self->TabletsTotalByDomain[domainKey] << "