diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index 341325b3a313..1f1e595d0679 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -189,21 +189,34 @@ class TSelfCheckRequest : public TActorBootstrapped { int Count = 1; TStackVec Identifiers; - TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) { - Type = info.tablettype(); - Leader = info.followerid() == 0; + static ETabletState GetState(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) { if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_STOPPED) { - State = ETabletState::Stopped; - } else if (!settings.IsHiveSynchronizationPeriod - && info.volatilestate() != NKikimrHive::TABLET_VOLATILE_STATE_RUNNING - && TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier - && info.tabletbootmode() == NKikimrHive::TABLET_BOOT_MODE_DEFAULT) { - State = ETabletState::Dead; - } else if (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) { - State = ETabletState::RestartsTooOften; - } else { - State = ETabletState::Good; + return ETabletState::Stopped; + } + ETabletState state = (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) ? ETabletState::RestartsTooOften : ETabletState::Good; + if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_RUNNING) { + return state; + } + if (info.tabletbootmode() != NKikimrHive::TABLET_BOOT_MODE_DEFAULT) { + return state; + } + if (info.lastalivetimestamp() != 0 && TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier) { + // Tablet is not alive for a long time + // We should report it as dead unless it's just waiting to be created + if (info.generation() == 0 && info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_BOOTING && !info.inwaitqueue()) { + return state; + } + return ETabletState::Dead; } + return state; + + } + + TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) + : Type(info.tablettype()) + , State(GetState(info, settings)) + , Leader(info.followerid() == 0) + { } bool operator ==(const TNodeTabletStateCount& o) const { @@ -1983,6 +1996,8 @@ class TSelfCheckRequest : public TActorBootstrapped { } } + // do not propagate RED status to vdisk - so that vdisk is not considered down when computing group status + context.OverallStatus = MinStatus(context.OverallStatus, Ydb::Monitoring::StatusFlag::ORANGE); storagePDiskStatus.set_overall(context.GetOverallStatus()); } diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp index c17f58211696..eefa30345e57 100644 --- a/ydb/core/health_check/health_check_ut.cpp +++ b/ydb/core/health_check/health_check_ut.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -68,7 +69,8 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { struct TTestVSlotInfo { std::optional Status; - ui32 Generation; + ui32 Generation = DEFAULT_GROUP_GENERATION; + NKikimrBlobStorage::EDriveStatus PDiskStatus = NKikimrBlobStorage::ACTIVE; TTestVSlotInfo(std::optional status = NKikimrBlobStorage::READY, ui32 generation = DEFAULT_GROUP_GENERATION) @@ -77,7 +79,11 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { { } - TTestVSlotInfo(NKikimrBlobStorage::EVDiskStatus status) : Status(status), Generation(DEFAULT_GROUP_GENERATION) {} + TTestVSlotInfo(NKikimrBlobStorage::EVDiskStatus status, NKikimrBlobStorage::EDriveStatus pDiskStatus = NKikimrBlobStorage::ACTIVE) + : Status(status) + , PDiskStatus(pDiskStatus) + { + } }; using TVDisks = TVector; @@ -222,18 +228,20 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { entry->mutable_info()->set_name(STORAGE_POOL_NAME); } - void AddPDisksToSysViewResponse(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr* ev, size_t count, double occupancy) { + void AddPDisksToSysViewResponse(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr* ev, const TVDisks& vslots, double occupancy) { auto& record = (*ev)->Get()->Record; auto entrySample = record.entries(0); record.clear_entries(); auto pdiskId = PDISK_START_ID; const size_t totalSize = 3'200'000'000'000ull; - for (size_t i = 0; i < count; ++i) { + const auto *descriptor = NKikimrBlobStorage::EDriveStatus_descriptor(); + for (const auto& vslot : vslots) { auto* entry = record.add_entries(); entry->CopyFrom(entrySample); entry->mutable_key()->set_pdiskid(pdiskId); entry->mutable_info()->set_totalsize(totalSize); entry->mutable_info()->set_availablesize((1 - occupancy) * totalSize); + entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(vslot.PDiskStatus)->name()); ++pdiskId; } } @@ -482,7 +490,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case NSysView::TEvSysView::EvGetPDisksResponse: { auto* x = reinterpret_cast(&ev); - AddPDisksToSysViewResponse(x, vdisks.size(), occupancy); + AddPDisksToSysViewResponse(x, vdisks, occupancy); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -710,6 +718,14 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1); } + Y_UNIT_TEST(YellowIssueReadyVDisksOnFaultyPDisks) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, {NKikimrBlobStorage::READY, NKikimrBlobStorage::FAULTY}}); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::ORANGE, 0); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0); + } + /* HC currently infers group status on its own, so it's never unknown Y_UNIT_TEST(RedGroupIssueWhenUnknownGroupStatus) { auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::UNKNOWN, {}); @@ -1818,123 +1834,128 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].id(), "static"); } - void HiveSyncTest(bool syncPeriod) { + Y_UNIT_TEST(ShardsLimit999) { + ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED); + } + + Y_UNIT_TEST(ShardsLimit995) { + ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE); + } + + Y_UNIT_TEST(ShardsLimit905) { + ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW); + } + + Y_UNIT_TEST(ShardsLimit800) { + ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN); + } + + Y_UNIT_TEST(ShardsNoLimit) { + ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN); + } + + bool HasDeadTabletIssue(const Ydb::Monitoring::SelfCheckResult& result) { + for (const auto& issue_log : result.issue_log()) { + if (issue_log.level() == 4 && issue_log.type() == "TABLET") { + return true; + } + } + return false; + } + + Y_UNIT_TEST(TestTabletIsDead) { TPortManager tp; ui16 port = tp.GetPort(2134); ui16 grpcPort = tp.GetPort(2135); auto settings = TServerSettings(port) - .SetNodeCount(1) + .SetNodeCount(2) .SetDynamicNodeCount(1) .SetUseRealThreads(false) .SetDomainName("Root"); TServer server(settings); server.EnableGRpc(grpcPort); + TClient client(settings); - TTestActorRuntime& runtime = *server.GetRuntime(); - ui32 dynNodeId = runtime.GetNodeId(1); + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); - auto observerFunc = [&](TAutoPtr& ev) { - switch (ev->GetTypeRewrite()) { - case TEvHive::EvResponseHiveInfo: { - auto *x = reinterpret_cast(&ev); - auto& record = (*x)->Get()->Record; - record.SetStartTimeTimestamp(0); - if (syncPeriod) { - record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS / 2); - } else { - record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS * 2); - } - auto *tablet = record.MutableTablets()->Add(); - tablet->SetTabletID(1); - tablet->SetNodeID(dynNodeId); - tablet->SetTabletType(NKikimrTabletBase::TTabletTypes::DataShard); - tablet->SetVolatileState(NKikimrHive::TABLET_VOLATILE_STATE_BOOTING); - tablet->MutableObjectDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId); - tablet->MutableObjectDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId); - break; - } - case TEvHive::EvResponseHiveNodeStats: { - auto *x = reinterpret_cast(&ev); - auto &record = (*x)->Get()->Record; - auto *nodeStats = record.MutableNodeStats()->Add(); - nodeStats->SetNodeId(dynNodeId); - nodeStats->MutableNodeDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId); - nodeStats->MutableNodeDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId); - break; - } - case NConsole::TEvConsole::EvGetTenantStatusResponse: { - auto *x = reinterpret_cast(&ev); - ChangeGetTenantStatusResponse(x, "/Root/database"); - break; - } - case TEvTxProxySchemeCache::EvNavigateKeySetResult: { - auto *x = reinterpret_cast(&ev); - TSchemeCacheNavigate::TEntry& entry((*x)->Get()->Request->ResultSet.front()); - entry.Status = TSchemeCacheNavigate::EStatus::Ok; - entry.Kind = TSchemeCacheNavigate::EKind::KindExtSubdomain; - entry.Path = {"Root", "database"}; - entry.DomainInfo = MakeIntrusive(SUBDOMAIN_KEY, SUBDOMAIN_KEY); + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1); + server.DestroyDynamicLocalService(2); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - break; - } - } + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); - return TTestActorRuntime::EEventAction::PROCESS; - }; - runtime.SetObserverFunc(observerFunc); + UNIT_ASSERT(HasDeadTabletIssue(result)); + } - TActorId sender = runtime.AllocateEdgeActor(); - TAutoPtr handle; + Y_UNIT_TEST(TestBootingTabletIsNotDead) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(2) + .SetDynamicNodeCount(1) + .SetUseRealThreads(false) + .SetDomainName("Root"); + TServer server(settings); + server.EnableGRpc(grpcPort); - auto *request = new NHealthCheck::TEvSelfCheckRequest; - request->Request.set_return_verbose_status(true); - request->Database = "/Root/database"; - runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0)); - const auto result = runtime.GrabEdgeEvent(handle)->Result; + TClient client(settings); - Cerr << result.ShortDebugString() << Endl; + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); - UNIT_ASSERT_VALUES_EQUAL(result.database_status_size(), 1); + auto blockBoot = runtime->AddObserver([](auto&& ev) { ev.Reset(); }); - bool deadTabletIssueFoundInResult = false; - for (const auto &issue_log : result.issue_log()) { - if (issue_log.level() == 4 && issue_log.type() == "TABLET") { - UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().id().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().type(), "DataShard"); - deadTabletIssueFoundInResult = true; - } - } + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1, false); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - UNIT_ASSERT_VALUES_EQUAL(syncPeriod, !deadTabletIssueFoundInResult); - } + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); - Y_UNIT_TEST(HiveSyncPeriodIgnoresTabletsState) { - HiveSyncTest(true); + UNIT_ASSERT(!HasDeadTabletIssue(result)); } - Y_UNIT_TEST(AfterHiveSyncPeriodReportsTabletsState) { - HiveSyncTest(false); - } + Y_UNIT_TEST(TestReBootingTabletIsDead) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(2) + .SetDynamicNodeCount(2) + .SetUseRealThreads(false) + .SetDomainName("Root"); + TServer server(settings); + server.EnableGRpc(grpcPort); - Y_UNIT_TEST(ShardsLimit999) { - ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED); - } + TClient client(settings); - Y_UNIT_TEST(ShardsLimit995) { - ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE); - } + TTestActorRuntime* runtime = server.GetRuntime(); + runtime->SetLogPriority(NKikimrServices::HIVE, NActors::NLog::PRI_TRACE); + TActorId sender = runtime->AllocateEdgeActor(); - Y_UNIT_TEST(ShardsLimit905) { - ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW); - } - Y_UNIT_TEST(ShardsLimit800) { - ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN); - } + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1, true); + server.SetupDynamicLocalService(3, "Root"); + auto blockBoot = runtime->AddObserver([](auto&& ev) { ev.Reset(); }); + server.DestroyDynamicLocalService(2); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - Y_UNIT_TEST(ShardsNoLimit) { - ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN); + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); + + UNIT_ASSERT(HasDeadTabletIssue(result)); } } } diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index b421e11757cc..cafce3a1b80f 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -236,6 +236,7 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec if (tablet == nullptr) { continue; } + tablet->InWaitQueue = false; if (tablet->IsAlive()) { BLOG_D("tablet " << record.TabletId << " already alive, skipping"); continue; @@ -261,6 +262,7 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec UpdateTabletFollowersNumber(leader, db, sideEffects); } BootQueue.AddToWaitQueue(record); // waiting for new node + tablet->InWaitQueue = true; continue; } } @@ -1854,6 +1856,9 @@ void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabl if (req.GetReturnMetrics()) { tabletInfo.MutableMetrics()->CopyFrom(info->GetResourceValues()); } + if (info->InWaitQueue) { + tabletInfo.SetInWaitQueue(true); + } if (req.GetReturnChannelHistory()) { for (const auto& channel : info->TabletStorageInfo->Channels) { auto& tabletChannel = *tabletInfo.AddTabletChannels(); diff --git a/ydb/core/mind/hive/tablet_info.h b/ydb/core/mind/hive/tablet_info.h index 35920dd1748c..433b5e988bd9 100644 --- a/ydb/core/mind/hive/tablet_info.h +++ b/ydb/core/mind/hive/tablet_info.h @@ -162,6 +162,7 @@ struct TTabletInfo { TInstant PostponedStart; EBalancerPolicy BalancerPolicy; TNodeId FailedNodeId = 0; // last time we tried to start the tablet, we failed on this node + bool InWaitQueue = false; TTabletInfo(ETabletRole role, THive& hive); TTabletInfo(const TTabletInfo&) = delete; diff --git a/ydb/core/protos/hive.proto b/ydb/core/protos/hive.proto index 64e4e7c216f2..578809b86ffe 100644 --- a/ydb/core/protos/hive.proto +++ b/ydb/core/protos/hive.proto @@ -494,6 +494,7 @@ message TTabletInfo { optional uint32 RestartsPerPeriod = 22; optional uint64 LastAliveTimestamp = 23; optional EBalancerPolicy BalancerPolicy = 24; + optional bool InWaitQueue = 25; } message TEvSeizeTabletsReply { diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 30962870e8e6..fbc0ed46e7db 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -472,7 +472,7 @@ namespace Tests { app.AddDomain(domain.Release()); } - TVector TServer::StartPQTablets(ui32 pqTabletsN) { + TVector TServer::StartPQTablets(ui32 pqTabletsN, bool wait) { auto getChannelBind = [](const TString& storagePool) { TChannelBind bind; bind.SetStoragePoolName(storagePool); @@ -507,7 +507,7 @@ namespace Tests { UNIT_ASSERT_EQUAL_C(createTabletReply->Record.GetOwner(), tabletId, createTabletReply->Record.GetOwner() << " != " << tabletId); ui64 id = createTabletReply->Record.GetTabletID(); - while (true) { + while (wait) { auto tabletCreationResult = Runtime->GrabEdgeEventRethrow(handle); UNIT_ASSERT(tabletCreationResult); diff --git a/ydb/core/testlib/test_client.h b/ydb/core/testlib/test_client.h index 133f0fe77d39..68b878f4de04 100644 --- a/ydb/core/testlib/test_client.h +++ b/ydb/core/testlib/test_client.h @@ -305,7 +305,7 @@ namespace Tests { } } void StartDummyTablets(); - TVector StartPQTablets(ui32 pqTabletsN); + TVector StartPQTablets(ui32 pqTabletsN, bool wait = true); TTestActorRuntime* GetRuntime() const; const TServerSettings& GetSettings() const; const NScheme::TTypeRegistry* GetTypeRegistry();