diff --git a/ydb/core/viewer/json_healthcheck.h b/ydb/core/viewer/json_healthcheck.h index 256a9da018a3..1d6e4485c9b0 100644 --- a/ydb/core/viewer/json_healthcheck.h +++ b/ydb/core/viewer/json_healthcheck.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "viewer.h" #include @@ -10,6 +11,7 @@ #include #include #include +#include "json_pipe_req.h" #include "healthcheck_record.h" #include @@ -23,7 +25,8 @@ enum HealthCheckResponseFormat { PROMETHEUS }; -class TJsonHealthCheck : public TActorBootstrapped { +class TJsonHealthCheck : public TViewerPipeClient { + using TBase = TViewerPipeClient; IViewer* Viewer; static const bool WithRetry = false; NMon::TEvHttpInfo::TPtr Event; @@ -31,6 +34,10 @@ class TJsonHealthCheck : public TActorBootstrapped { ui32 Timeout = 0; HealthCheckResponseFormat Format; TString Database; + bool Cache = true; + std::optional Result; + std::optional SubscribedNodeId; + Ydb::Monitoring::StatusFlag::Status MinStatus = Ydb::Monitoring::StatusFlag::UNSPECIFIED; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { @@ -42,8 +49,26 @@ class TJsonHealthCheck : public TActorBootstrapped { , Event(ev) {} - void Bootstrap(const TActorContext& ctx) { + THolder MakeSelfCheckRequrest() { const auto& params(Event->Get()->Request.GetParams()); + THolder request = MakeHolder(); + request->Database = Database; + request->Request.set_return_verbose_status(FromStringWithDefault(params.Get("verbose"), false)); + request->Request.set_maximum_level(FromStringWithDefault(params.Get("max_level"), 0)); + request->Request.set_merge_records(FromStringWithDefault(params.Get("merge_records"), false)); + request->Request.set_minimum_status(MinStatus); + SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout()); + return request; + } + + void SendHealthCheckRequest() { + auto request = MakeSelfCheckRequrest(); + Send(NHealthCheck::MakeHealthCheckID(), request.Release()); + } + + void Bootstrap() { + const auto& params(Event->Get()->Request.GetParams()); + InitConfig(params); Format = HealthCheckResponseFormat::JSON; if (params.Has("format")) { @@ -68,32 +93,37 @@ class TJsonHealthCheck : public TActorBootstrapped { JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); } + Database = params.Get("tenant"); + Cache = FromStringWithDefault(params.Get("cache"), true); Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - THolder request = MakeHolder(); - request->Database = Database = params.Get("tenant"); - request->Request.set_return_verbose_status(FromStringWithDefault(params.Get("verbose"), false)); - request->Request.set_maximum_level(FromStringWithDefault(params.Get("max_level"), 0)); - request->Request.set_merge_records(FromStringWithDefault(params.Get("merge_records"), false)); - SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout()); - if (params.Has("min_status")) { - Ydb::Monitoring::StatusFlag::Status minStatus; - if (Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &minStatus)) { - request->Request.set_minimum_status(minStatus); - } else { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } + + if (params.Get("min_status") && !Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &MinStatus)) { + Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + return PassAway(); + } + if (Cache && Database) { + RequestStateStorageMetadataCacheEndpointsLookup(Database); + } else { + SendHealthCheckRequest(); } - Send(NHealthCheck::MakeHealthCheckID(), request.Release()); Timeout += Timeout * 20 / 100; // we prefer to wait for more (+20%) verbose timeout status from HC - ctx.Schedule(TDuration::Seconds(Timeout), new TEvents::TEvWakeup()); - Become(&TThis::StateRequestedInfo); + Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + } + + void PassAway() override { + if (SubscribedNodeId.has_value()) { + Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); + } + TBase::PassAway(); } STFUNC(StateRequestedInfo) { switch (ev->GetTypeRewrite()) { - HFunc(NHealthCheck::TEvSelfCheckResult, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + hFunc(NHealthCheck::TEvSelfCheckResult, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleTimeout); + hFunc(NHealthCheck::TEvSelfCheckResultProto, Handle); + cFunc(TEvents::TSystem::Undelivered, SendHealthCheckRequest); + hFunc(TEvStateStorage::TEvBoardInfo, Handle); } } @@ -101,10 +131,10 @@ class TJsonHealthCheck : public TActorBootstrapped { return issueLog.count() == 0 ? 1 : issueLog.count(); } - THolder> GetRecordCounters(NHealthCheck::TEvSelfCheckResult::TPtr& ev) { + THolder> GetRecordCounters() { const auto *descriptor = Ydb::Monitoring::StatusFlag_Status_descriptor(); THashMap recordCounters; - for (auto& log : ev->Get()->Result.issue_log()) { + for (auto& log : Result->issue_log()) { TMetricRecord record { .Database = log.location().database().name(), .Message = log.message(), @@ -123,15 +153,14 @@ class TJsonHealthCheck : public TActorBootstrapped { return MakeHolder>(recordCounters); } - void HandleJSON(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) { + void HandleJSON() { TStringStream json; - TProtoToJson::ProtoToJson(json, ev->Get()->Result, JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); + TProtoToJson::ProtoToJson(json, *Result, JsonSettings); + Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); } - void HandlePrometheus(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) { - auto recordCounters = GetRecordCounters(ev); + void HandlePrometheus() { + auto recordCounters = GetRecordCounters(); TStringStream ss; IMetricEncoderPtr encoder = EncoderPrometheus(&ss); @@ -159,7 +188,7 @@ class TJsonHealthCheck : public TActorBootstrapped { } } const auto *descriptor = Ydb::Monitoring::SelfCheck_Result_descriptor(); - auto result = descriptor->FindValueByNumber(ev->Get()->Result.self_check_result())->name(); + auto result = descriptor->FindValueByNumber(Result->self_check_result())->name(); e->OnMetricBegin(EMetricType::IGAUGE); { e->OnLabelsBegin(); @@ -175,21 +204,50 @@ class TJsonHealthCheck : public TActorBootstrapped { e->OnMetricEnd(); e->OnStreamEnd(); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKTEXT(Event->Get()) + ss.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); + Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKTEXT(Event->Get()) + ss.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); } - void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) { - if (Format == HealthCheckResponseFormat::JSON) { - HandleJSON(ev, ctx); + void ReplyAndPassAway() { + if (Result) { + if (Format == HealthCheckResponseFormat::JSON) { + HandleJSON(); + } else { + HandlePrometheus(); + } + } + PassAway(); + } + + void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev) { + Result = std::move(ev->Get()->Result); + ReplyAndPassAway(); + } + + void Handle(TEvents::TEvUndelivered::TPtr&) { + SendHealthCheckRequest(); + } + + void Handle(NHealthCheck::TEvSelfCheckResultProto::TPtr& ev) { + Result = std::move(ev->Get()->Record); + NHealthCheck::RemoveUnrequestedEntries(*Result, MakeSelfCheckRequrest().Release()->Request); + ReplyAndPassAway(); + } + + void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { + auto activeNode = TDatabaseMetadataCache::PickActiveNode(ev->Get()->InfoEntries); + if (activeNode != 0) { + SubscribedNodeId = activeNode; + std::optional cache = MakeDatabaseMetadataCacheId(activeNode); + auto request = MakeHolder(); + Send(*cache, request.Release()); } else { - HandlePrometheus(ev, ctx); + SendHealthCheckRequest(); } } - void HandleTimeout(const TActorContext &ctx) { + void HandleTimeout() { Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); + PassAway(); } }; @@ -224,6 +282,11 @@ struct TJsonRequestParameters { description: path to database required: false type: string + - name: cache + in: query + description: use cache + required: false + type: boolean - name: verbose in: query description: return verbose status diff --git a/ydb/core/viewer/json_pipe_req.h b/ydb/core/viewer/json_pipe_req.h index ed39e9263080..e28f30c29a7c 100644 --- a/ydb/core/viewer/json_pipe_req.h +++ b/ydb/core/viewer/json_pipe_req.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -248,6 +249,16 @@ class TViewerPipeClient : public TActorBootstrapped { ++Requests; } + void RequestStateStorageMetadataCacheEndpointsLookup(const TString& path) { + if (!AppData()->DomainsInfo->Domain) { + return; + } + TBase::RegisterWithSameMailbox(CreateBoardLookupActor(MakeDatabaseMetadataCacheBoardPath(path), + TBase::SelfId(), + EBoardLookupMode::Second)); + ++Requests; + } + std::vector GetNodesFromBoardReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { std::vector databaseNodes; if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { diff --git a/ydb/core/viewer/viewer.cpp b/ydb/core/viewer/viewer.cpp index 690de4928a14..dc92bb4de208 100644 --- a/ydb/core/viewer/viewer.cpp +++ b/ydb/core/viewer/viewer.cpp @@ -989,6 +989,26 @@ NKikimrViewer::EFlag GetBSGroupOverallFlag( return GetBSGroupOverallState(info, vDisksIndex, pDisksIndex).Overall; } +NKikimrViewer::EFlag GetViewerFlag(Ydb::Monitoring::StatusFlag::Status flag) { + switch (flag) { + case Ydb::Monitoring::StatusFlag::GREY: + case Ydb::Monitoring::StatusFlag::UNSPECIFIED: + case Ydb::Monitoring::StatusFlag_Status_StatusFlag_Status_INT_MIN_SENTINEL_DO_NOT_USE_: + case Ydb::Monitoring::StatusFlag_Status_StatusFlag_Status_INT_MAX_SENTINEL_DO_NOT_USE_: + return NKikimrViewer::EFlag::Grey; + case Ydb::Monitoring::StatusFlag::GREEN: + return NKikimrViewer::EFlag::Green; + case Ydb::Monitoring::StatusFlag::BLUE: + return NKikimrViewer::EFlag::Green; + case Ydb::Monitoring::StatusFlag::YELLOW: + return NKikimrViewer::EFlag::Yellow; + case Ydb::Monitoring::StatusFlag::ORANGE: + return NKikimrViewer::EFlag::Orange; + case Ydb::Monitoring::StatusFlag::RED: + return NKikimrViewer::EFlag::Red; + } +} + NKikimrWhiteboard::EFlag GetWhiteboardFlag(NKikimrViewer::EFlag flag) { switch (flag) { case NKikimrViewer::EFlag::Grey: diff --git a/ydb/core/viewer/viewer.h b/ydb/core/viewer/viewer.h index 876f8400994a..ce0c13bee47c 100644 --- a/ydb/core/viewer/viewer.h +++ b/ydb/core/viewer/viewer.h @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace NKikimr { @@ -272,6 +273,8 @@ NKikimrViewer::EFlag GetFlagFromUsage(double usage); NKikimrWhiteboard::EFlag GetWhiteboardFlag(NKikimrViewer::EFlag flag); NKikimrViewer::EFlag GetViewerFlag(NKikimrWhiteboard::EFlag flag); +NKikimrViewer::EFlag GetViewerFlag(Ydb::Monitoring::StatusFlag::Status flag); + } // NViewer } // NKikimr