Skip to content

Commit

Permalink
Merge 02a2c61 into 15d1fa8
Browse files Browse the repository at this point in the history
  • Loading branch information
kunga authored Feb 1, 2024
2 parents 15d1fa8 + 02a2c61 commit 42d9c22
Show file tree
Hide file tree
Showing 2 changed files with 258 additions and 44 deletions.
205 changes: 180 additions & 25 deletions ydb/core/tablet_flat/flat_part_charge_btree_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class TChargeBTreeIndex : public ICharge {
using TRecIdx = NPage::TRecIdx;
using TGroupId = NPage::TGroupId;
using TChild = TBtreeIndexNode::TChild;
using TShortChild = TBtreeIndexNode::TShortChild;

struct TChildState {
TPageId PageId;
Expand Down Expand Up @@ -69,6 +70,7 @@ class TChargeBTreeIndex : public ICharge {
bool ready = true, overshot = true;
bool chargeGroups = bool(Groups); // false value means that beginRowId, endRowId are invalid and shouldn't be used
ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups
TRowId beginBytesLimitRowId = Max<TRowId>();

const auto& meta = Part->IndexPages.BTreeGroups[0];
Y_ABORT_UNLESS(endRowId <= meta.RowCount);
Expand All @@ -82,6 +84,7 @@ class TChargeBTreeIndex : public ICharge {
TVector<TNodeState> level, nextLevel(::Reserve(3));
TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
ui64 key1Items = 0, key1Bytes = 0, prevKey1Items = 0, prevKey1Bytes = 0;

const auto iterateLevel = [&](const auto& tryHandleChild) {
// tryHandleChild may update them, copy for simplicity
Expand Down Expand Up @@ -119,6 +122,13 @@ class TChargeBTreeIndex : public ICharge {
return;
}
}
if (bytesLimit) {
ui64 bytes = child->DataSize - firstChild->DataSize;
if (LimitExceeded(bytes, bytesLimit)) {
overshot = false;
return;
}
}
}
}
}
Expand All @@ -128,14 +138,16 @@ class TChargeBTreeIndex : public ICharge {
const auto skipUnloadedRows = [&](const TChildState& child) {
if (child.PageId == key1PageId) {
if (chargeGroups && chargeGroupsItemsLimit) {
// TODO: use erased count
ui64 unloadedItems = child.EndRowId - child.BeginRowId;
ui64 unloadedItems = key1Items - prevKey1Items;
if (unloadedItems < chargeGroupsItemsLimit) {
chargeGroupsItemsLimit -= unloadedItems;
} else {
chargeGroups = false;
}
}
if (chargeGroups && bytesLimit) {
beginBytesLimitRowId = Max(beginRowId, child.BeginRowId);
}
beginRowId = Max(beginRowId, child.EndRowId);
}
if (child.PageId == key2PageId) {
Expand All @@ -149,14 +161,20 @@ class TChargeBTreeIndex : public ICharge {
const auto& node = nextLevel.back();
if (child.PageId == key1PageId) {
TRecIdx pos = node.Seek(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
key1PageId = node.GetShortChild(pos).PageId;
auto& key1Child = node.GetChild(pos);
key1PageId = key1Child.PageId;
key1Items = key1Child.GetNonErasedRowCount();
key1Bytes = key1Child.DataSize;
if (pos) {
beginRowId = Max(beginRowId, node.GetShortChild(pos - 1).RowCount); // move beginRowId to the first key >= key1
auto& prevKey1Child = node.GetChild(pos - 1);
prevKey1Items = prevKey1Child.GetNonErasedRowCount();
prevKey1Bytes = prevKey1Child.DataSize;
beginRowId = Max(beginRowId, prevKey1Child.RowCount); // move beginRowId to the first key >= key1
}
}
if (child.PageId == key2PageId) {
TRecIdx pos = node.Seek(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
auto& key2Child = node.GetShortChild(pos);
auto& key2Child = node.GetChild(pos);
key2PageId = key2Child.PageId;
endRowId = Min(endRowId, key2Child.RowCount + 1); // move endRowId - 1 to the first key > key2
if (key2Child.RowCount <= beginRowId) {
Expand Down Expand Up @@ -207,7 +225,7 @@ class TChargeBTreeIndex : public ICharge {
}

if (!ready) { // some index pages are missing, do not continue
ready &= DoPrechargeGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
ready &= DoGroups(chargeGroups, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
return {ready, false};
}

Expand All @@ -221,7 +239,7 @@ class TChargeBTreeIndex : public ICharge {
iterateLevel(tryHandleDataPage);
}

ready &= DoPrechargeGroups(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
ready &= DoGroups(chargeGroups, beginRowId, endRowId, beginBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds

return {ready, overshot};
}
Expand All @@ -234,6 +252,7 @@ class TChargeBTreeIndex : public ICharge {
bool ready = true, overshot = true;
bool chargeGroups = bool(Groups); // false value means that beginRowId, endRowId are invalid and shouldn't be used
ui64 chargeGroupsItemsLimit = itemsLimit; // pessimistic items limit for groups
TRowId endBytesLimitRowId = Max<TRowId>();

const auto& meta = Part->IndexPages.BTreeGroups[0];
Y_ABORT_UNLESS(endRowId <= meta.RowCount);
Expand All @@ -248,6 +267,7 @@ class TChargeBTreeIndex : public ICharge {
TVector<TNodeState> level, nextLevel(::Reserve(3));
TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
ui64 prevKey1Items = 0, prevKey1Bytes = 0, key1Items = 0, key1Bytes = 0;

const auto iterateLevel = [&](const auto& tryHandleChild) {
// tryHandleChild may update them, copy for simplicity
Expand Down Expand Up @@ -288,6 +308,13 @@ class TChargeBTreeIndex : public ICharge {
return;
}
}
if (bytesLimit) {
ui64 bytes = prevLastChild->DataSize - child->DataSize;
if (LimitExceeded(bytes, bytesLimit)) {
overshot = false;
return;
}
}
}
}
ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId));
Expand All @@ -298,14 +325,16 @@ class TChargeBTreeIndex : public ICharge {
const auto skipUnloadedRows = [&](const TChildState& child) {
if (child.PageId == key1PageId) {
if (chargeGroups && chargeGroupsItemsLimit) {
// TODO: use erased count
ui64 unloadedItems = child.EndRowId - child.BeginRowId;
ui64 unloadedItems = key1Items - prevKey1Items;
if (unloadedItems < chargeGroupsItemsLimit) {
chargeGroupsItemsLimit -= unloadedItems;
} else {
chargeGroups = false;
}
}
if (chargeGroups && bytesLimit) {
endBytesLimitRowId = Min(endRowId, child.EndRowId);
}
endRowId = Min(endRowId, child.BeginRowId);
}
if (child.PageId == key2PageId) {
Expand All @@ -319,15 +348,22 @@ class TChargeBTreeIndex : public ICharge {
const auto& node = nextLevel.back();
if (child.PageId == key1PageId) {
TRecIdx pos = node.SeekReverse(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
auto& key1Child = node.GetShortChild(pos);
auto& key1Child = node.GetChild(pos);
key1PageId = key1Child.PageId;
key1Items = key1Child.GetNonErasedRowCount();
key1Bytes = key1Child.DataSize;
if (pos) {
auto& prevKey1Child = node.GetChild(pos - 1);
prevKey1Items = prevKey1Child.GetNonErasedRowCount();
prevKey1Bytes = prevKey1Child.DataSize;
}
endRowId = Min(endRowId, key1Child.RowCount); // move endRowId - 1 to the last key <= key1
}
if (child.PageId == key2PageId) {
TRecIdx pos = node.Seek(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
key2PageId = node.GetShortChild(pos).PageId;
key2PageId = node.GetChild(pos).PageId;
if (pos) {
auto& prevKey2Child = node.GetShortChild(pos - 1);
auto& prevKey2Child = node.GetChild(pos - 1);
beginRowId = Max(beginRowId, prevKey2Child.RowCount - 1); // move beginRowId to the last key < key2
if (prevKey2Child.RowCount >= endRowId) {
chargeGroups = false; // key2 is after current slice
Expand Down Expand Up @@ -388,7 +424,7 @@ class TChargeBTreeIndex : public ICharge {
}

if (!ready) { // some index pages are missing, do not continue
ready &= DoPrechargeGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
return {ready, false};
}

Expand All @@ -402,69 +438,85 @@ class TChargeBTreeIndex : public ICharge {
iterateLevel(tryHandleDataPage);
}

ready &= DoPrechargeGroupsReverse(chargeGroups, beginRowId, endRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds
ready &= DoGroupsReverse(chargeGroups, beginRowId, endRowId, endBytesLimitRowId, chargeGroupsItemsLimit, bytesLimit); // precharge groups using the latest row bounds

return {ready, overshot};
}

private:
bool DoPrechargeGroups(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
bool DoGroups(bool chargeGroups, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
bool ready = true;

if (chargeGroups && beginRowId < endRowId) {
if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) {
endRowId = beginRowId + itemsLimit + 1;
}
if (beginBytesLimitRowId == Max<TRowId>()) {
beginBytesLimitRowId = beginRowId;
}

for (auto groupId : Groups) {
ready &= DoPrechargeGroup(groupId, beginRowId, endRowId, bytesLimit);
ready &= DoGroup(groupId, beginRowId, endRowId, beginBytesLimitRowId, bytesLimit);
}
}

return ready;
}

bool DoPrechargeGroupsReverse(bool chargeGroups, TRowId beginRowId, TRowId endRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
bool DoGroupsReverse(bool chargeGroups, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 itemsLimit, ui64 bytesLimit) const noexcept {
bool ready = true;

if (chargeGroups && beginRowId < endRowId) {
if (itemsLimit && endRowId - beginRowId - 1 >= itemsLimit) {
beginRowId = endRowId - itemsLimit - 1;
}
if (endBytesLimitRowId == Max<TRowId>()) {
endBytesLimitRowId = endRowId;
}

for (auto groupId : Groups) {
ready &= DoPrechargeGroup(groupId, beginRowId, endRowId, bytesLimit);
ready &= DoGroupReverse(groupId, beginRowId, endRowId, endBytesLimitRowId, bytesLimit);
}
}

return ready;
}

private:
bool DoPrechargeGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, ui64 bytesLimit) const noexcept {
bool DoGroup(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId beginBytesLimitRowId, ui64 bytesLimit) const noexcept {
bool ready = true;

Y_UNUSED(bytesLimit);

const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index];

TVector<TNodeState> level, nextLevel(::Reserve(3));
ui64 prevBeginDataSize = 0;
ui64 prevBeginBytesLimitDataSize = bytesLimit ? GetPrevDataSize(meta, beginBytesLimitRowId) : 0;

const auto iterateLevel = [&](const auto& tryHandleChild) {
ui64 prevChildDataSize = prevBeginDataSize;
for (const auto &node : level) {
TRecIdx from = 0, to = node.GetChildrenCount();
if (node.BeginRowId < beginRowId) {
from = node.Seek(beginRowId);
if (from) {
prevChildDataSize = prevBeginDataSize = node.GetShortChild(from - 1).DataSize;
}
}
if (node.EndRowId > endRowId) {
to = node.Seek(endRowId - 1) + 1;
}
for (TRecIdx pos : xrange(from, to)) {
auto child = node.GetShortChild(pos);
auto child = node.GetShortChildRef(pos);
auto prevChild = pos ? node.GetShortChildRef(pos - 1) : nullptr;
TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId;
TRowId endRowId = child.RowCount;
ready &= tryHandleChild(TChildState(child.PageId, beginRowId, endRowId));
TRowId endRowId = child->RowCount;
if (bytesLimit) {
if (prevChildDataSize > prevBeginBytesLimitDataSize && LimitExceeded(prevChildDataSize - prevBeginBytesLimitDataSize, bytesLimit)) {
return;
}
}
ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId));
prevChildDataSize = child->DataSize;
}
}
};
Expand Down Expand Up @@ -500,6 +552,109 @@ class TChargeBTreeIndex : public ICharge {
return ready;
}

bool DoGroupReverse(TGroupId groupId, TRowId beginRowId, TRowId endRowId, TRowId endBytesLimitRowId, ui64 bytesLimit) const noexcept {
bool ready = true;

const auto& meta = groupId.IsHistoric() ? Part->IndexPages.BTreeHistoric[groupId.Index] : Part->IndexPages.BTreeGroups[groupId.Index];

// level's nodes is in reverse order
TVector<TNodeState> level, nextLevel(::Reserve(3));
ui64 endBytesLimitDataSize = bytesLimit ? GetDataSize(meta, endBytesLimitRowId - 1) : 0;

const auto iterateLevel = [&](const auto& tryHandleChild) {
for (const auto &node : level) {
TRecIdx from = 0, to = node.GetChildrenCount();
if (node.BeginRowId < beginRowId) {
from = node.Seek(beginRowId);
}
if (node.EndRowId > endRowId) {
to = node.Seek(endRowId - 1) + 1;
}
for (TRecIdx posExt = to; posExt > from; posExt--) {
auto child = node.GetShortChildRef(posExt - 1);
auto prevChild = posExt - 1 ? node.GetShortChildRef(posExt - 2) : nullptr;
TRowId beginRowId = prevChild ? prevChild->RowCount : node.BeginRowId;
TRowId endRowId = child->RowCount;
if (bytesLimit) {
if (endBytesLimitDataSize > child->DataSize && LimitExceeded(endBytesLimitDataSize - child->DataSize, bytesLimit)) {
return;
}
}
ready &= tryHandleChild(TChildState(child->PageId, beginRowId, endRowId));
}
}
};

const auto tryHandleNode = [&](TChildState child) -> bool {
return TryLoadNode(child, nextLevel);
};

const auto tryHandleDataPage = [&](TChildState child) -> bool {
return HasDataPage(child.PageId, groupId);
};

for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
if (height == 0) {
ready &= tryHandleNode(TChildState(meta.PageId, 0, meta.RowCount));
} else {
iterateLevel(tryHandleNode);
}
level.swap(nextLevel);
nextLevel.clear();
}

if (!ready) { // some index pages are missing, do not continue
return ready;
}

if (meta.LevelCount == 0) {
ready &= tryHandleDataPage(TChildState(meta.PageId, 0, meta.RowCount));
} else {
iterateLevel(tryHandleDataPage);
}

return ready;
}

private:
ui64 GetPrevDataSize(const TBtreeIndexMeta& meta, TRowId rowId) const {
TPageId pageId = meta.PageId;
ui64 result = 0;

for (ui32 height = 0; height < meta.LevelCount; height++) {
auto page = Env->TryGetPage(Part, pageId);
if (!page) {
return result;
}
auto node = TBtreeIndexNode(*page);
auto pos = node.Seek(rowId);
pageId = node.GetShortChild(pos).PageId;
if (pos) {
result = node.GetShortChild(pos - 1).DataSize;
}
}

return result;
}

ui64 GetDataSize(TBtreeIndexMeta meta, TRowId rowId) const {
TPageId pageId = meta.PageId;
ui64 result = meta.DataSize;

for (ui32 height = 0; height < meta.LevelCount; height++) {
auto page = Env->TryGetPage(Part, pageId);
if (!page) {
return result;
}
auto node = TBtreeIndexNode(*page);
auto pos = node.Seek(rowId);
pageId = node.GetShortChild(pos).PageId;
result = node.GetShortChild(pos).DataSize;
}

return result;
}

private:
const TSharedData* TryGetDataPage(TPageId pageId, TGroupId groupId) const noexcept {
return Env->TryGetPage(Part, pageId, groupId);
Expand All @@ -509,7 +664,7 @@ class TChargeBTreeIndex : public ICharge {
return bool(Env->TryGetPage(Part, pageId, groupId));
}

bool TryLoadNode(TChildState& child, TVector<TNodeState>& level) const noexcept {
bool TryLoadNode(const TChildState& child, TVector<TNodeState>& level) const noexcept {
auto page = Env->TryGetPage(Part, child.PageId);
if (!page) {
return false;
Expand Down
Loading

0 comments on commit 42d9c22

Please sign in to comment.