Skip to content

Commit

Permalink
Consider free slots count in BSC during group allocation (ydb-platfor…
Browse files Browse the repository at this point in the history
  • Loading branch information
va-kuznecov authored Oct 29, 2024
1 parent 8acfed4 commit 012816b
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 31 deletions.
11 changes: 7 additions & 4 deletions ydb/core/mind/bscontroller/group_layout_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,15 @@ namespace NKikimr::NBsController {
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber);
const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk);

const auto& disksPerRealm = NumDisksPerRealm[vdisk.FailRealm][pos.Realm];
const auto& disksPerDomain = NumDisksPerDomain[domainIdx][pos.Domain];

return {
.RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
.DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - NumDisksPerDomain[domainIdx][pos.Domain],
.RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - disksPerRealm,
.DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - disksPerDomain,
.RealmGroupScatter = NumDisks - NumDisksPerRealmGroup[pos.RealmGroup],
.RealmScatter = NumDisksInRealm[vdisk.FailRealm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
.DomainScatter = NumDisksInDomain[domainIdx] - NumDisksPerDomain[domainIdx][pos.Domain],
.RealmScatter = NumDisksInRealm[vdisk.FailRealm] - disksPerRealm,
.DomainScatter = NumDisksInDomain[domainIdx] - disksPerDomain,
};
}

Expand Down
49 changes: 27 additions & 22 deletions ydb/core/mind/bscontroller/group_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ namespace NKikimr::NBsController {
}
}

ui32 GetPickerScore() const {
return NumSlots;
// can be negative
i32 FreeSlots() const {
return i32(MaxSlots) - NumSlots;
}

double GetPickerScore() const {
return double(NumSlots) / MaxSlots;
}
};

Expand All @@ -65,7 +70,7 @@ namespace NKikimr::NBsController {

using TPDomainCandidatesRange = std::pair<std::vector<ui32>::const_iterator, std::vector<ui32>::const_iterator>;
using TPDiskCandidatesRange = std::pair<std::vector<TPDiskInfo*>::const_iterator, std::vector<TPDiskInfo*>::const_iterator>;

struct TDiskManager {
TImpl& Self;
const TBlobStorageGroupInfo::TTopology Topology;
Expand Down Expand Up @@ -126,7 +131,7 @@ namespace NKikimr::NBsController {

return res;
}

TGroupConstraints ProcessGroupConstraints(const TGroupConstraintsDefinition& groupConstraints) {
TGroupConstraints res(Topology.GetTotalVDisksNum());
Traverse(groupConstraints, [&](TVDiskIdShort vdisk, TTargetDiskConstraints diskConstraints) {
Expand Down Expand Up @@ -159,7 +164,7 @@ namespace NKikimr::NBsController {
return true;
}

TPDiskByPosition SetupMatchingDisks(ui32 maxScore) {
TPDiskByPosition SetupMatchingDisks(double maxScore) {
TPDiskByPosition res;
res.reserve(Self.PDiskByPosition.size());

Expand Down Expand Up @@ -245,8 +250,8 @@ namespace NKikimr::NBsController {
}

bool DiskIsBetter(const TPDiskInfo& pretender, const TPDiskInfo& king) const {
if (pretender.NumSlots != king.NumSlots) {
return pretender.NumSlots < king.NumSlots;
if (pretender.FreeSlots() != king.FreeSlots()) {
return pretender.FreeSlots() > king.FreeSlots();
} else if (GivesLocalityBoost(pretender, king) || BetterQuotaMatch(pretender, king)) {
return true;
} else {
Expand Down Expand Up @@ -293,7 +298,7 @@ namespace NKikimr::NBsController {
const auto it = LocalityFactor.find(groupId);
return it != LocalityFactor.end() ? it->second : 0;
}
};
};

struct TAllocator : public TDiskManager {

Expand All @@ -303,7 +308,7 @@ namespace NKikimr::NBsController {
{
}

bool FillInGroup(ui32 maxScore, TUndoLog& undo, TGroup& group, const TGroupConstraints& constraints) {
bool FillInGroup(double maxScore, TUndoLog& undo, TGroup& group, const TGroupConstraints& constraints) {
// determine PDisks that fit our requirements (including score)
auto v = SetupMatchingDisks(maxScore);

Expand Down Expand Up @@ -575,7 +580,7 @@ namespace NKikimr::NBsController {
}

bool SetupNavigation(const TGroup& group) {
TPDiskByPosition matchingDisks = SetupMatchingDisks(::Max<ui32>());
TPDiskByPosition matchingDisks = SetupMatchingDisks(::Max<double>());
const ui32 totalFailRealmsNum = Topology.GetTotalFailRealmsNum();
const ui32 numFailDomainsPerFailRealm = Topology.GetNumFailDomainsPerFailRealm();
const ui32 numDisksPerFailRealm = numFailDomainsPerFailRealm * Topology.GetNumVDisksPerFailDomain();
Expand Down Expand Up @@ -653,7 +658,7 @@ namespace NKikimr::NBsController {
if (toMoveOut + freeDomains < toMoveIn) {
continue; // not enough free domains to place all the disks
}
if (newMovesRequired < movesRequired || (newMovesRequired == movesRequired &&
if (newMovesRequired < movesRequired || (newMovesRequired == movesRequired &&
freeDomains > pDomainsInPRealm[bestRealm].size())) {
bestRealm = pRealm;
movesRequired = newMovesRequired;
Expand Down Expand Up @@ -684,7 +689,7 @@ namespace NKikimr::NBsController {
}
}

void SetupCandidates(ui32 maxScore) {
void SetupCandidates(double maxScore) {
TPDiskByPosition matchingDisks = SetupMatchingDisks(maxScore);
DomainCandidates.clear();
DiskCandidates.clear();
Expand Down Expand Up @@ -738,7 +743,7 @@ namespace NKikimr::NBsController {
std::pair<TMisplacedVDisks::EFailLevel, std::vector<ui32>> FindMisplacedVDisks(const TGroup& group) {
using EFailLevel = TMisplacedVDisks::EFailLevel;
std::unordered_map<ui32, std::unordered_set<ui32>> usedPDomains; // pRealm -> { pDomain1, pDomain2, ... }
std::set<TPDiskId> usedPDisks;
std::set<TPDiskId> usedPDisks;
// {pRealm, pDomain} -> { pdisk1, pdisk2, ... }

EFailLevel failLevel = EFailLevel::ALL_OK;
Expand Down Expand Up @@ -806,7 +811,7 @@ namespace NKikimr::NBsController {
return {failLevel, misplacedVDisks};
}

std::optional<TPDiskId> TargetMisplacedVDisk(ui32 maxScore, const TGroup& group, const TVDiskIdShort& vdisk) {
std::optional<TPDiskId> TargetMisplacedVDisk(double maxScore, const TGroup& group, const TVDiskIdShort& vdisk) {
for (ui32 orderNumber = 0; orderNumber < group.size(); ++orderNumber) {
if (!group[orderNumber] && orderNumber != Topology.GetOrderNumber(vdisk)) {
return std::nullopt;
Expand All @@ -821,11 +826,11 @@ namespace NKikimr::NBsController {

const auto& domainCandidates = DomainCandidates[pRealm];
TPDomainCandidatesRange pDomainRange = { domainCandidates.begin(), domainCandidates.end() };

for (; pDomainRange.first != pDomainRange.second;) {
ui32 pDomain = *pDomainRange.first++;
const auto& diskCandidates = DiskCandidates[pRealm][pDomain];

if (!diskCandidates.empty()) {
return (*diskCandidates.begin())->PDiskId;
}
Expand Down Expand Up @@ -966,7 +971,7 @@ namespace NKikimr::NBsController {
}

// calculate score table
std::vector<ui32> scores;
std::vector<double> scores;
for (const auto& [pdiskId, pdisk] : PDisks) {
if (allocator.DiskIsUsable(pdisk)) {
scores.push_back(pdisk.GetPickerScore());
Expand Down Expand Up @@ -1033,7 +1038,7 @@ namespace NKikimr::NBsController {
return TMisplacedVDisks(EFailLevel::INCORRECT_LAYOUT, {}, "Cannot map failRealms to pRealms");
}

sanitizer.SetupCandidates(::Max<ui32>());
sanitizer.SetupCandidates(::Max<double>());
auto [failLevel, misplacedVDiskNums] = sanitizer.FindMisplacedVDisks(group);
std::vector<TVDiskIdShort> misplacedVDisks;
for (ui32 orderNum : misplacedVDiskNums) {
Expand All @@ -1042,7 +1047,7 @@ namespace NKikimr::NBsController {
return TMisplacedVDisks(failLevel, misplacedVDisks);
}

std::optional<TPDiskId> TargetMisplacedVDisk(ui32 groupId, TGroupDefinition& groupDefinition, TVDiskIdShort vdisk,
std::optional<TPDiskId> TargetMisplacedVDisk(ui32 groupId, TGroupDefinition& groupDefinition, TVDiskIdShort vdisk,
TForbiddenPDisks forbid, i64 requiredSpace, bool requireOperational, TString& error) {
if (Dirty) {
std::sort(PDiskByPosition.begin(), PDiskByPosition.end());
Expand All @@ -1067,7 +1072,7 @@ namespace NKikimr::NBsController {
}

// calculate score table
std::vector<ui32> scores;
std::vector<double> scores;
for (const auto& [pdiskId, pdisk] : PDisks) {
if (sanitizer.DiskIsUsable(pdisk)) {
scores.push_back(pdisk.GetPickerScore());
Expand All @@ -1077,7 +1082,7 @@ namespace NKikimr::NBsController {
scores.erase(std::unique(scores.begin(), scores.end()), scores.end());

// bisect scores to find optimal working one
sanitizer.SetupCandidates(::Max<ui32>());
sanitizer.SetupCandidates(::Max<double>());

std::optional<TPDiskId> result;

Expand Down Expand Up @@ -1152,7 +1157,7 @@ namespace NKikimr::NBsController {
return Impl->FindMisplacedVDisks(group);
}

std::optional<TPDiskId> TGroupMapper::TargetMisplacedVDisk(TGroupId groupId, TGroupMapper::TGroupDefinition& group,
std::optional<TPDiskId> TGroupMapper::TargetMisplacedVDisk(TGroupId groupId, TGroupMapper::TGroupDefinition& group,
TVDiskIdShort vdisk, TForbiddenPDisks forbid, i64 requiredSpace, bool requireOperational, TString& error) {
return Impl->TargetMisplacedVDisk(groupId.GetRawId(), group, vdisk, std::move(forbid), requiredSpace, requireOperational, error);
}
Expand Down
37 changes: 32 additions & 5 deletions ydb/core/mind/bscontroller/group_mapper_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ class TTestContext {

ESanitizeResult status = ESanitizeResult::ALREADY;
TString error;

if (!result.Disks.empty()) {
status = ESanitizeResult::FAIL;
for (auto vdisk : result.Disks) {
Expand Down Expand Up @@ -434,7 +434,7 @@ class TTestContext {
}

void PopulateGroupMapper(TGroupMapper& mapper, ui32 maxSlots = 16, TSet<TPDiskId> unusableDisks = {},
TSet<TPDiskId> nonoperationalDisks = {}, std::optional<ui32> decommittedDataCenter = std::nullopt) {
TSet<TPDiskId> nonoperationalDisks = {}, std::optional<ui32> decommittedDataCenter = std::nullopt, bool equalSlots = true) {
std::map<TPDiskId, std::vector<ui32>> groupDisks;
for (const auto& [groupId, group] : Groups) {
for (TPDiskId pdiskId : group.PDisks) {
Expand All @@ -443,12 +443,13 @@ class TTestContext {
}
for (const auto& pair : PDisks) {
auto& g = groupDisks[pair.first];
const auto& location = pair.second.GetLocation().GetLegacyValue();
mapper.RegisterPDisk({
.PDiskId = pair.first,
.Location = pair.second.GetLocation(),
.Usable = !unusableDisks.count(pair.first),
.NumSlots = pair.second.NumSlots,
.MaxSlots = maxSlots,
.MaxSlots = equalSlots || location.Rack < 8 ? maxSlots : 2 * maxSlots,
.Groups{g.begin(), g.end()},
.SpaceAvailable = 0,
.Operational = !nonoperationalDisks.contains(pair.first),
Expand Down Expand Up @@ -637,6 +638,32 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {
}
}

Y_UNIT_TEST(NonUniformClusterDifferentSlotsPerDisk) {
std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
for (ui32 rack = 0; rack < 12; ++rack) {
disks.emplace_back(1, 1, rack, 1, 1);
}
std::random_shuffle(disks.begin(), disks.end());
TTestContext context(disks);
UNIT_ASSERT_VALUES_EQUAL((8 + 4), context.GetTotalDisks());
TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block));
context.PopulateGroupMapper(mapper, 8, {}, {}, std::nullopt, false);
for (ui32 i = 0; i < 16; ++i) {
Ctest << i << "/" << 16 << Endl;
TGroupMapper::TGroupDefinition group;
context.AllocateGroup(mapper, group);
context.CheckGroupErasure(group);
}
TVector<ui32> slots = context.GetSlots();
ui64 slots_total = 0;
for (ui32 numSlots : slots) {
slots_total += numSlots;
Ctest << "slots " << numSlots << " ";
}
Ctest << slots_total << Endl;
UNIT_ASSERT_VALUES_EQUAL(slots_total, 8 * 8 + 4 * 16);
}

Y_UNIT_TEST(NonUniformCluster2) {
std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
for (ui32 rack = 0, body = 0; rack < 12; ++rack) {
Expand Down Expand Up @@ -1012,9 +1039,9 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {

Ctest << "group after layout shuffling:" << Endl;
context.DumpGroup(groupDef);

ui32 sanitationStep = 0;

TGroupMapper::TGroupDefinition group = groupDef;
TString path = "";
TSet<TGroupMapper::TGroupDefinition> seen;
Expand Down

0 comments on commit 012816b

Please sign in to comment.