Skip to content

Commit

Permalink
Collect stats during lifetime KIKIMR-19289
Browse files Browse the repository at this point in the history
  • Loading branch information
pixcc committed Dec 28, 2023
1 parent 7652655 commit 38c4c9c
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 41 deletions.
19 changes: 18 additions & 1 deletion ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1520,7 +1520,9 @@ void THive::DeleteTablet(TTabletId tabletId) {
}
Y_ENSURE_LOG(nt->second.LockedTablets.count(&tablet) == 0, " Deleting tablet found on node " << nt->first << " in locked set");
}
UpdateCounterTabletsTotal(-1 - (tablet.Followers.size()));
const i64 tabletsTotalDiff = -1 - (tablet.Followers.size());
UpdateCounterTabletsTotal(tabletsTotalDiff);
UpdateTabletsTotalByDomain(tabletsTotalDiff, tablet.ObjectDomain);
Tablets.erase(it);
}
}
Expand Down Expand Up @@ -1553,6 +1555,21 @@ void THive::KillNode(TNodeId nodeId, const TActorId& local) {
Execute(CreateKillNode(nodeId, local));
}

void THive::UpdateTabletsTotalByDomain(i64 tabletsTotalDiff, const TSubDomainKey& objectDomain) {
if (objectDomain) {
TabletsTotalByDomain[objectDomain] += tabletsTotalDiff;
}
}

void THive::UpdateTabletsAliveByDomain(i64 tabletsAliveDiff, const TSubDomainKey& objectDomain, const TSubDomainKey& tabletNodeDomain) {
if (objectDomain) {
TabletsAliveByDomain[objectDomain] += tabletsAliveDiff;
if (objectDomain == tabletNodeDomain) {
TabletsAliveInObjectDomainByDomain[objectDomain] += tabletsAliveDiff;
}
}
}

void THive::SetCounterTabletsTotal(ui64 tabletsTotal) {
if (TabletCounters != nullptr) {
auto& counter = TabletCounters->Simple()[NHive::COUNTER_TABLETS_TOTAL];
Expand Down
5 changes: 5 additions & 0 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
ui32 DataCenters = 1;
ui32 RegisteredDataCenters = 1;
TObjectDistributions ObjectDistributions;
std::unordered_map<TSubDomainKey, ui64> TabletsTotalByDomain;
std::unordered_map<TSubDomainKey, ui64> TabletsAliveByDomain;
std::unordered_map<TSubDomainKey, ui64> TabletsAliveInObjectDomainByDomain;

bool AreWeRootHive() const { return RootHiveId == HiveId; }
bool AreWeSubDomainHive() const { return RootHiveId != HiveId; }
Expand Down Expand Up @@ -628,6 +631,8 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
TTabletCategoryInfo& GetTabletCategory(TTabletCategoryId tabletCategoryId);
void KillNode(TNodeId nodeId, const TActorId& local);
void AddToBootQueue(TTabletInfo* tablet);
void UpdateTabletsTotalByDomain(i64 tabletsTotalDiff, const TSubDomainKey& objectDomain);
void UpdateTabletsAliveByDomain(i64 tabletsAliveDiff, const TSubDomainKey& objectDomain, const TSubDomainKey& tabletNodeDomain);
void SetCounterTabletsTotal(ui64 tabletsTotal);
void UpdateCounterTabletsTotal(i64 tabletsTotalDiff);
void UpdateCounterTabletsAlive(i64 tabletsAliveDiff);
Expand Down
19 changes: 19 additions & 0 deletions ydb/core/mind/hive/leader_tablet_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ TFollowerId TLeaderTabletInfo::GetFollowerPromotableOnNode(TNodeId nodeId) const
}

void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const TVector<TSubDomainKey>& allowedDomains) {
const TSubDomainKey oldObjectDomain = ObjectDomain;

if (!allowedDomains.empty()) {
NodeFilter.AllowedDomains = allowedDomains;
if (!objectDomain) {
Expand All @@ -80,6 +82,22 @@ void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const T
for (auto& followerGroup : FollowerGroups) {
followerGroup.NodeFilter.AllowedDomains = NodeFilter.AllowedDomains;
}

const ui64 leaderAndFollowers = 1 + Followers.size();
Hive.UpdateTabletsTotalByDomain(-leaderAndFollowers, oldObjectDomain);
Hive.UpdateTabletsTotalByDomain(+leaderAndFollowers, ObjectDomain);

if (IsAlive()) {
Hive.UpdateTabletsAliveByDomain(-1, oldObjectDomain, Node->GetServicedDomain());
Hive.UpdateTabletsAliveByDomain(+1, ObjectDomain, Node->GetServicedDomain());
}

for (const auto& follower : Followers) {
if (follower.IsAlive()) {
Hive.UpdateTabletsAliveByDomain(-1, oldObjectDomain, follower.Node->GetServicedDomain());
Hive.UpdateTabletsAliveByDomain(+1, ObjectDomain, follower.Node->GetServicedDomain());
}
}
}

bool TLeaderTabletInfo::InitiateAssignTabletGroups() {
Expand Down Expand Up @@ -125,6 +143,7 @@ TFollowerTabletInfo& TLeaderTabletInfo::AddFollower(TFollowerGroup& followerGrou
follower.Id = followerId;
}
Hive.UpdateCounterTabletsTotal(+1);
Hive.UpdateTabletsTotalByDomain(+1, ObjectDomain);
return follower;
}

Expand Down
49 changes: 10 additions & 39 deletions ydb/core/mind/hive/monitoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,37 +456,6 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
}

void RenderHTMLPage(IOutputStream &out) {
THashMap<TSubDomainKey, size_t> tabletsRunningInObjectDomain;
THashMap<TSubDomainKey, size_t> tabletsRunningInOtherDomains;
THashMap<TSubDomainKey, size_t> tabletsTotal;

for (const auto& [_, tablet] : Self->Tablets) {
const TSubDomainKey objectDomain = tablet.ObjectDomain;
++tabletsTotal[objectDomain];

const TNodeInfo* node = tablet.GetNode();
if (node) {
if (node->GetServicedDomain() == objectDomain) {
++tabletsRunningInObjectDomain[objectDomain];
} else {
++tabletsRunningInOtherDomains[objectDomain];
}
}

for (const auto& follower : tablet.Followers) {
++tabletsTotal[objectDomain];

const TNodeInfo* followerNode = follower.GetNode();
if (followerNode) {
if (followerNode->GetServicedDomain() == objectDomain) {
++tabletsRunningInObjectDomain[objectDomain];
} else {
++tabletsRunningInOtherDomains[objectDomain];
}
}
}
}

// out << "<script>$('.container').css('width', 'auto');</script>";
out << "<table class='table table-sortable'>";
out << "<thead>";
Expand All @@ -495,8 +464,8 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
out << "<th>Name</th>";
out << "<th>Hive</th>";
out << "<th>Status</th>";
out << "<th>TabletsRunningInTenantDomain</th>";
out << "<th>TabletsRunningInOtherDomains</th>";
out << "<th>TabletsAliveInTenantDomain</th>";
out << "<th>TabletsAliveInOtherDomains</th>";
out << "<th>TabletsTotal</th>";
out << "</tr>";
out << "</thead>";
Expand All @@ -521,16 +490,18 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
out << "<td>-</td>";
out << "<td>-</td>";
}
if (tabletsTotal[domainKey] > 0) {
out << "<td>" << std::round(tabletsRunningInObjectDomain[domainKey] * 100.0 / tabletsTotal[domainKey]) << "%"
<< " (" << tabletsRunningInObjectDomain[domainKey] << " of " << tabletsTotal[domainKey] << ")" << "</td>";
out << "<td>" << std::round(tabletsRunningInOtherDomains[domainKey] * 100.0 / tabletsTotal[domainKey]) << "%"
<< " (" << tabletsRunningInOtherDomains[domainKey] << " of " << tabletsTotal[domainKey] << ")" << "</td>";
if (Self->TabletsTotalByDomain[domainKey] > 0) {
out << "<td>" << std::round(Self->TabletsAliveInObjectDomainByDomain[domainKey] * 100.0 / Self->TabletsTotalByDomain[domainKey]) << "%"
<< " (" << Self->TabletsAliveInObjectDomainByDomain[domainKey] << " of " << Self->TabletsTotalByDomain[domainKey] << ")" << "</td>";

const ui64 tabletsAliveInOtherDomains = Self->TabletsAliveByDomain[domainKey] - Self->TabletsAliveInObjectDomainByDomain[domainKey];
out << "<td>" << std::round(tabletsAliveInOtherDomains * 100.0 / Self->TabletsTotalByDomain[domainKey]) << "%"
<< " (" << tabletsAliveInOtherDomains << " of " << Self->TabletsTotalByDomain[domainKey] << ")" << "</td>";
} else {
out << "<td>-</td>";
out << "<td>-</td>";
}
out << "<td>" << tabletsTotal[domainKey] << "</td>";
out << "<td>" << Self->TabletsTotalByDomain[domainKey] << "</td>";
out << "</tr>";
}
out << "</tbody>";
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/hive/node_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV
TabletsRunningByType[tablet->GetTabletType()].erase(tablet);
TabletsOfObject[tablet->GetObjectId()].erase(tablet);
Hive.UpdateCounterTabletsAlive(-1);
Hive.UpdateTabletsAliveByDomain(-1, tablet->GetLeader().ObjectDomain, GetServicedDomain());
if (tablet->HasCounter() && tablet->IsLeader()) {
Hive.UpdateObjectCount(tablet->AsLeader(), *this, -1);
}
Expand All @@ -84,6 +85,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV
TabletsRunningByType[tablet->GetTabletType()].emplace(tablet);
TabletsOfObject[tablet->GetObjectId()].emplace(tablet);
Hive.UpdateCounterTabletsAlive(+1);
Hive.UpdateTabletsAliveByDomain(+1, tablet->GetLeader().ObjectDomain, GetServicedDomain());
if (tablet->HasCounter() && tablet->IsLeader()) {
Hive.UpdateObjectCount(tablet->AsLeader(), *this, +1);
}
Expand Down
4 changes: 3 additions & 1 deletion ydb/core/mind/hive/tx__load_everything.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,11 +706,13 @@ class TTxLoadEverything : public TTransactionBase<THive> {

void Complete(const TActorContext& ctx) override {
BLOG_NOTICE("THive::TTxLoadEverything::Complete " << Self->DatabaseConfig.ShortDebugString());
i64 tabletsTotal = 0;
ui64 tabletsTotal = 0;
for (auto it = Self->Tablets.begin(); it != Self->Tablets.end(); ++it) {
++tabletsTotal;
Self->UpdateTabletsTotalByDomain(+1, it->second.ObjectDomain);
for (const TTabletInfo& follower : it->second.Followers) {
++tabletsTotal;
Self->UpdateTabletsTotalByDomain(+1, it->second.ObjectDomain);
if (follower.IsLeader()) {
follower.AsLeader();
} else {
Expand Down

0 comments on commit 38c4c9c

Please sign in to comment.