Skip to content

Commit

Permalink
Allow SelfHeal operation while in DEGRADED state (#8734)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexvru authored Sep 6, 2024
1 parent 0b897b9 commit 7770159
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 8 deletions.
7 changes: 7 additions & 0 deletions ydb/core/mind/bscontroller/bsc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,13 @@ void TBlobStorageController::Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) {
const bool initial = !HostRecords;
HostRecords = std::make_shared<THostRecordMap::element_type>(ev->Get());
if (initial) {
if (auto *appData = AppData()) {
if (appData->Icb) {
EnableSelfHealWithDegraded = std::make_shared<TControlWrapper>(0, 0, 1);
appData->Icb->RegisterSharedControl(*EnableSelfHealWithDegraded,
"BlobStorageControllerControls.EnableSelfHealWithDegraded");
}
}
SelfHealId = Register(CreateSelfHealActor());
PushStaticGroupsToSelfHeal();
if (StorageConfigObtained) {
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
bool AllowMultipleRealmsOccupation = true;
bool StorageConfigObtained = false;
bool Loaded = false;
std::shared_ptr<TControlWrapper> EnableSelfHealWithDegraded;

std::set<std::tuple<TGroupId, TNodeId>> GroupToNode;

Expand Down
29 changes: 21 additions & 8 deletions ydb/core/mind/bscontroller/self_heal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ namespace NKikimr::NBsController {
std::shared_ptr<TBlobStorageGroupInfo::TTopology> Topology;
TBlobStorageGroupInfo::TGroupVDisks FailedGroupDisks;
const bool IsSelfHealReasonDecommit;
const bool IgnoreDegradedGroupsChecks;
const bool DonorMode;
THashSet<TVDiskID> PendingVDisks;
THashMap<TActorId, TVDiskID> ActorToDiskMap;
Expand All @@ -51,14 +52,15 @@ namespace NKikimr::NBsController {
public:
TReassignerActor(TActorId controllerId, TGroupId groupId, TEvControllerUpdateSelfHealInfo::TGroupContent group,
std::optional<TVDiskID> vdiskToReplace, std::shared_ptr<TBlobStorageGroupInfo::TTopology> topology,
bool isSelfHealReasonDecommit, bool donorMode)
bool isSelfHealReasonDecommit, bool ignoreDegradedGroupsChecks, bool donorMode)
: ControllerId(controllerId)
, GroupId(groupId)
, Group(std::move(group))
, VDiskToReplace(vdiskToReplace)
, Topology(std::move(topology))
, FailedGroupDisks(Topology.get())
, IsSelfHealReasonDecommit(isSelfHealReasonDecommit)
, IgnoreDegradedGroupsChecks(ignoreDegradedGroupsChecks)
, DonorMode(donorMode)
{}

Expand Down Expand Up @@ -166,6 +168,9 @@ namespace NKikimr::NBsController {
request->SetIgnoreGroupReserve(true);
request->SetSettleOnlyOnOperationalDisks(true);
request->SetIsSelfHealReasonDecommit(IsSelfHealReasonDecommit);
if (IgnoreDegradedGroupsChecks) {
request->SetIgnoreDegradedGroupsChecks(IgnoreDegradedGroupsChecks);
}
request->SetAllowUnusableDisks(true);
if (VDiskToReplace) {
ev->SelfHeal = true;
Expand Down Expand Up @@ -278,6 +283,7 @@ namespace NKikimr::NBsController {
bool AllowMultipleRealmsOccupation;
bool DonorMode;
THostRecordMap HostRecords;
std::shared_ptr<TControlWrapper> EnableSelfHealWithDegraded;

using TTopologyDescr = std::tuple<TBlobStorageGroupType::EErasureSpecies, ui32, ui32, ui32>;
THashMap<TTopologyDescr, std::shared_ptr<TBlobStorageGroupInfo::TTopology>> Topologies;
Expand All @@ -289,13 +295,15 @@ namespace NKikimr::NBsController {

public:
TSelfHealActor(ui64 tabletId, std::shared_ptr<std::atomic_uint64_t> unreassignableGroups, THostRecordMap hostRecords,
bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode)
bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode,
std::shared_ptr<TControlWrapper> enableSelfHealWithDegraded)
: TabletId(tabletId)
, UnreassignableGroups(std::move(unreassignableGroups))
, GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled)
, AllowMultipleRealmsOccupation(allowMultipleRealmsOccupation)
, DonorMode(donorMode)
, HostRecords(std::move(hostRecords))
, EnableSelfHealWithDegraded(std::move(enableSelfHealWithDegraded))
{}

void Bootstrap(const TActorId& parentId) {
Expand Down Expand Up @@ -427,9 +435,11 @@ namespace NKikimr::NBsController {

// check if it is possible to move anything out
bool isSelfHealReasonDecommit;
if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit)) {
bool ignoreDegradedGroupsChecks;
if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit,
&ignoreDegradedGroupsChecks)) {
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
*v, group.Topology, isSelfHealReasonDecommit, DonorMode));
*v, group.Topology, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
} else {
++counter; // this group can't be reassigned right now

Expand Down Expand Up @@ -484,7 +494,8 @@ namespace NKikimr::NBsController {
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(GroupLayoutSanitizerOperationLog,
"Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content.Generation);
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/, DonorMode));
std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/,
false /*ignoreDegradedGroupsChecks*/, DonorMode));
}
}
}
Expand Down Expand Up @@ -534,7 +545,8 @@ namespace NKikimr::NBsController {
}

std::optional<TVDiskID> FindVDiskToReplace(const TEvControllerUpdateSelfHealInfo::TGroupContent& content,
TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit) {
TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit,
bool *ignoreDegradedGroupsChecks) {
// main idea of selfhealing is step-by-step healing of bad group; we can allow healing of group with more
// than one disk missing, but we should not move next faulty disk until previous one is replicated, at least
// partially (meaning only phantoms left)
Expand All @@ -553,7 +565,7 @@ namespace NKikimr::NBsController {
}
[[fallthrough]];
case NKikimrBlobStorage::EVDiskStatus::INIT_PENDING:
return std::nullopt; // don't touch group with replicating disks
return std::nullopt; // don't touch group with replicating or starting disks

default:
break;
Expand All @@ -579,6 +591,7 @@ namespace NKikimr::NBsController {
continue; // this group will become degraded when applying self-heal logic, skip disk
}
*isSelfHealReasonDecommit = vdisk.IsSelfHealReasonDecommit;
*ignoreDegradedGroupsChecks = checker.IsDegraded(failedByReadiness) && *EnableSelfHealWithDegraded;
return vdiskId;
}
}
Expand Down Expand Up @@ -886,7 +899,7 @@ namespace NKikimr::NBsController {
IActor *TBlobStorageController::CreateSelfHealActor() {
Y_ABORT_UNLESS(HostRecords);
return new TSelfHealActor(TabletID(), SelfHealUnreassignableGroups, HostRecords, GroupLayoutSanitizerEnabled,
AllowMultipleRealmsOccupation, DonorMode);
AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded);
}

void TBlobStorageController::InitializeSelfHealState() {
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/protos/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1343,6 +1343,14 @@ message TImmediateControlsConfig {
DefaultValue: 200 }];
}

message TBlobStorageControllerControls {
optional uint64 EnableSelfHealWithDegraded = 1 [(ControlOptions) = {
Description: "Should SelfHeal automatically process groups that are in DEGRADED status (one step from nonworking)",
MinValue: 0,
MaxValue: 1,
DefaultValue: 0 }];
}

optional TDataShardControls DataShardControls = 1;
optional TTxLimitControls TxLimitControls = 2;
optional TCoordinatorControls CoordinatorControls = 3;
Expand All @@ -1353,6 +1361,7 @@ message TImmediateControlsConfig {
optional TTabletControls TabletControls = 8;
optional TDSProxyControls DSProxyControls = 9;
optional TPDiskControls PDiskControls = 10;
optional TBlobStorageControllerControls BlobStorageControllerControls = 11;
};

message TMeteringConfig {
Expand Down

0 comments on commit 7770159

Please sign in to comment.