Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consider free slots count in BSC during group allocation #11008

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions ydb/core/mind/bscontroller/group_layout_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,15 @@ namespace NKikimr::NBsController {
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber);
const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk);

const auto& disksPerRealm = NumDisksPerRealm[vdisk.FailRealm][pos.Realm];
const auto& disksPerDomain = NumDisksPerDomain[domainIdx][pos.Domain];

return {
.RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
.DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - NumDisksPerDomain[domainIdx][pos.Domain],
.RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - disksPerRealm,
.DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - disksPerDomain,
.RealmGroupScatter = NumDisks - NumDisksPerRealmGroup[pos.RealmGroup],
.RealmScatter = NumDisksInRealm[vdisk.FailRealm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
.DomainScatter = NumDisksInDomain[domainIdx] - NumDisksPerDomain[domainIdx][pos.Domain],
.RealmScatter = NumDisksInRealm[vdisk.FailRealm] - disksPerRealm,
.DomainScatter = NumDisksInDomain[domainIdx] - disksPerDomain,
};
}

Expand Down
49 changes: 27 additions & 22 deletions ydb/core/mind/bscontroller/group_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ namespace NKikimr::NBsController {
}
}

ui32 GetPickerScore() const {
return NumSlots;
// can be negative
i32 FreeSlots() const {
return i32(MaxSlots) - NumSlots;
}

double GetPickerScore() const {
return double(NumSlots) / MaxSlots;
}
};

Expand All @@ -65,7 +70,7 @@ namespace NKikimr::NBsController {

using TPDomainCandidatesRange = std::pair<std::vector<ui32>::const_iterator, std::vector<ui32>::const_iterator>;
using TPDiskCandidatesRange = std::pair<std::vector<TPDiskInfo*>::const_iterator, std::vector<TPDiskInfo*>::const_iterator>;

struct TDiskManager {
TImpl& Self;
const TBlobStorageGroupInfo::TTopology Topology;
Expand Down Expand Up @@ -126,7 +131,7 @@ namespace NKikimr::NBsController {

return res;
}

TGroupConstraints ProcessGroupConstraints(const TGroupConstraintsDefinition& groupConstraints) {
TGroupConstraints res(Topology.GetTotalVDisksNum());
Traverse(groupConstraints, [&](TVDiskIdShort vdisk, TTargetDiskConstraints diskConstraints) {
Expand Down Expand Up @@ -159,7 +164,7 @@ namespace NKikimr::NBsController {
return true;
}

TPDiskByPosition SetupMatchingDisks(ui32 maxScore) {
TPDiskByPosition SetupMatchingDisks(double maxScore) {
TPDiskByPosition res;
res.reserve(Self.PDiskByPosition.size());

Expand Down Expand Up @@ -245,8 +250,8 @@ namespace NKikimr::NBsController {
}

bool DiskIsBetter(const TPDiskInfo& pretender, const TPDiskInfo& king) const {
if (pretender.NumSlots != king.NumSlots) {
return pretender.NumSlots < king.NumSlots;
if (pretender.FreeSlots() != king.FreeSlots()) {
return pretender.FreeSlots() > king.FreeSlots();
} else if (GivesLocalityBoost(pretender, king) || BetterQuotaMatch(pretender, king)) {
return true;
} else {
Expand Down Expand Up @@ -293,7 +298,7 @@ namespace NKikimr::NBsController {
const auto it = LocalityFactor.find(groupId);
return it != LocalityFactor.end() ? it->second : 0;
}
};
};

struct TAllocator : public TDiskManager {

Expand All @@ -303,7 +308,7 @@ namespace NKikimr::NBsController {
{
}

bool FillInGroup(ui32 maxScore, TUndoLog& undo, TGroup& group, const TGroupConstraints& constraints) {
bool FillInGroup(double maxScore, TUndoLog& undo, TGroup& group, const TGroupConstraints& constraints) {
// determine PDisks that fit our requirements (including score)
auto v = SetupMatchingDisks(maxScore);

Expand Down Expand Up @@ -575,7 +580,7 @@ namespace NKikimr::NBsController {
}

bool SetupNavigation(const TGroup& group) {
TPDiskByPosition matchingDisks = SetupMatchingDisks(::Max<ui32>());
TPDiskByPosition matchingDisks = SetupMatchingDisks(::Max<double>());
const ui32 totalFailRealmsNum = Topology.GetTotalFailRealmsNum();
const ui32 numFailDomainsPerFailRealm = Topology.GetNumFailDomainsPerFailRealm();
const ui32 numDisksPerFailRealm = numFailDomainsPerFailRealm * Topology.GetNumVDisksPerFailDomain();
Expand Down Expand Up @@ -653,7 +658,7 @@ namespace NKikimr::NBsController {
if (toMoveOut + freeDomains < toMoveIn) {
continue; // not enough free domains to place all the disks
}
if (newMovesRequired < movesRequired || (newMovesRequired == movesRequired &&
if (newMovesRequired < movesRequired || (newMovesRequired == movesRequired &&
freeDomains > pDomainsInPRealm[bestRealm].size())) {
bestRealm = pRealm;
movesRequired = newMovesRequired;
Expand Down Expand Up @@ -684,7 +689,7 @@ namespace NKikimr::NBsController {
}
}

void SetupCandidates(ui32 maxScore) {
void SetupCandidates(double maxScore) {
TPDiskByPosition matchingDisks = SetupMatchingDisks(maxScore);
DomainCandidates.clear();
DiskCandidates.clear();
Expand Down Expand Up @@ -738,7 +743,7 @@ namespace NKikimr::NBsController {
std::pair<TMisplacedVDisks::EFailLevel, std::vector<ui32>> FindMisplacedVDisks(const TGroup& group) {
using EFailLevel = TMisplacedVDisks::EFailLevel;
std::unordered_map<ui32, std::unordered_set<ui32>> usedPDomains; // pRealm -> { pDomain1, pDomain2, ... }
std::set<TPDiskId> usedPDisks;
std::set<TPDiskId> usedPDisks;
// {pRealm, pDomain} -> { pdisk1, pdisk2, ... }

EFailLevel failLevel = EFailLevel::ALL_OK;
Expand Down Expand Up @@ -806,7 +811,7 @@ namespace NKikimr::NBsController {
return {failLevel, misplacedVDisks};
}

std::optional<TPDiskId> TargetMisplacedVDisk(ui32 maxScore, const TGroup& group, const TVDiskIdShort& vdisk) {
std::optional<TPDiskId> TargetMisplacedVDisk(double maxScore, const TGroup& group, const TVDiskIdShort& vdisk) {
for (ui32 orderNumber = 0; orderNumber < group.size(); ++orderNumber) {
if (!group[orderNumber] && orderNumber != Topology.GetOrderNumber(vdisk)) {
return std::nullopt;
Expand All @@ -821,11 +826,11 @@ namespace NKikimr::NBsController {

const auto& domainCandidates = DomainCandidates[pRealm];
TPDomainCandidatesRange pDomainRange = { domainCandidates.begin(), domainCandidates.end() };

for (; pDomainRange.first != pDomainRange.second;) {
ui32 pDomain = *pDomainRange.first++;
const auto& diskCandidates = DiskCandidates[pRealm][pDomain];

if (!diskCandidates.empty()) {
return (*diskCandidates.begin())->PDiskId;
}
Expand Down Expand Up @@ -966,7 +971,7 @@ namespace NKikimr::NBsController {
}

// calculate score table
std::vector<ui32> scores;
std::vector<double> scores;
for (const auto& [pdiskId, pdisk] : PDisks) {
if (allocator.DiskIsUsable(pdisk)) {
scores.push_back(pdisk.GetPickerScore());
Expand Down Expand Up @@ -1033,7 +1038,7 @@ namespace NKikimr::NBsController {
return TMisplacedVDisks(EFailLevel::INCORRECT_LAYOUT, {}, "Cannot map failRealms to pRealms");
}

sanitizer.SetupCandidates(::Max<ui32>());
sanitizer.SetupCandidates(::Max<double>());
auto [failLevel, misplacedVDiskNums] = sanitizer.FindMisplacedVDisks(group);
std::vector<TVDiskIdShort> misplacedVDisks;
for (ui32 orderNum : misplacedVDiskNums) {
Expand All @@ -1042,7 +1047,7 @@ namespace NKikimr::NBsController {
return TMisplacedVDisks(failLevel, misplacedVDisks);
}

std::optional<TPDiskId> TargetMisplacedVDisk(ui32 groupId, TGroupDefinition& groupDefinition, TVDiskIdShort vdisk,
std::optional<TPDiskId> TargetMisplacedVDisk(ui32 groupId, TGroupDefinition& groupDefinition, TVDiskIdShort vdisk,
TForbiddenPDisks forbid, i64 requiredSpace, bool requireOperational, TString& error) {
if (Dirty) {
std::sort(PDiskByPosition.begin(), PDiskByPosition.end());
Expand All @@ -1067,7 +1072,7 @@ namespace NKikimr::NBsController {
}

// calculate score table
std::vector<ui32> scores;
std::vector<double> scores;
for (const auto& [pdiskId, pdisk] : PDisks) {
if (sanitizer.DiskIsUsable(pdisk)) {
scores.push_back(pdisk.GetPickerScore());
Expand All @@ -1077,7 +1082,7 @@ namespace NKikimr::NBsController {
scores.erase(std::unique(scores.begin(), scores.end()), scores.end());

// bisect scores to find optimal working one
sanitizer.SetupCandidates(::Max<ui32>());
sanitizer.SetupCandidates(::Max<double>());

std::optional<TPDiskId> result;

Expand Down Expand Up @@ -1152,7 +1157,7 @@ namespace NKikimr::NBsController {
return Impl->FindMisplacedVDisks(group);
}

std::optional<TPDiskId> TGroupMapper::TargetMisplacedVDisk(TGroupId groupId, TGroupMapper::TGroupDefinition& group,
std::optional<TPDiskId> TGroupMapper::TargetMisplacedVDisk(TGroupId groupId, TGroupMapper::TGroupDefinition& group,
TVDiskIdShort vdisk, TForbiddenPDisks forbid, i64 requiredSpace, bool requireOperational, TString& error) {
return Impl->TargetMisplacedVDisk(groupId.GetRawId(), group, vdisk, std::move(forbid), requiredSpace, requireOperational, error);
}
Expand Down
37 changes: 32 additions & 5 deletions ydb/core/mind/bscontroller/group_mapper_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ class TTestContext {

ESanitizeResult status = ESanitizeResult::ALREADY;
TString error;

if (!result.Disks.empty()) {
status = ESanitizeResult::FAIL;
for (auto vdisk : result.Disks) {
Expand Down Expand Up @@ -434,7 +434,7 @@ class TTestContext {
}

void PopulateGroupMapper(TGroupMapper& mapper, ui32 maxSlots = 16, TSet<TPDiskId> unusableDisks = {},
TSet<TPDiskId> nonoperationalDisks = {}, std::optional<ui32> decommittedDataCenter = std::nullopt) {
TSet<TPDiskId> nonoperationalDisks = {}, std::optional<ui32> decommittedDataCenter = std::nullopt, bool equalSlots = true) {
std::map<TPDiskId, std::vector<ui32>> groupDisks;
for (const auto& [groupId, group] : Groups) {
for (TPDiskId pdiskId : group.PDisks) {
Expand All @@ -443,12 +443,13 @@ class TTestContext {
}
for (const auto& pair : PDisks) {
auto& g = groupDisks[pair.first];
const auto& location = pair.second.GetLocation().GetLegacyValue();
mapper.RegisterPDisk({
.PDiskId = pair.first,
.Location = pair.second.GetLocation(),
.Usable = !unusableDisks.count(pair.first),
.NumSlots = pair.second.NumSlots,
.MaxSlots = maxSlots,
.MaxSlots = equalSlots || location.Rack < 8 ? maxSlots : 2 * maxSlots,
.Groups{g.begin(), g.end()},
.SpaceAvailable = 0,
.Operational = !nonoperationalDisks.contains(pair.first),
Expand Down Expand Up @@ -637,6 +638,32 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {
}
}

Y_UNIT_TEST(NonUniformClusterDifferentSlotsPerDisk) {
std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
for (ui32 rack = 0; rack < 12; ++rack) {
disks.emplace_back(1, 1, rack, 1, 1);
}
std::random_shuffle(disks.begin(), disks.end());
TTestContext context(disks);
UNIT_ASSERT_VALUES_EQUAL((8 + 4), context.GetTotalDisks());
TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block));
context.PopulateGroupMapper(mapper, 8, {}, {}, std::nullopt, false);
for (ui32 i = 0; i < 16; ++i) {
Ctest << i << "/" << 16 << Endl;
TGroupMapper::TGroupDefinition group;
context.AllocateGroup(mapper, group);
context.CheckGroupErasure(group);
}
TVector<ui32> slots = context.GetSlots();
ui64 slots_total = 0;
for (ui32 numSlots : slots) {
slots_total += numSlots;
Ctest << "slots " << numSlots << " ";
}
Ctest << slots_total << Endl;
UNIT_ASSERT_VALUES_EQUAL(slots_total, 8 * 8 + 4 * 16);
}

Y_UNIT_TEST(NonUniformCluster2) {
std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
for (ui32 rack = 0, body = 0; rack < 12; ++rack) {
Expand Down Expand Up @@ -1012,9 +1039,9 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {

Ctest << "group after layout shuffling:" << Endl;
context.DumpGroup(groupDef);

ui32 sanitationStep = 0;

TGroupMapper::TGroupDefinition group = groupDef;
TString path = "";
TSet<TGroupMapper::TGroupDefinition> seen;
Expand Down
Loading