Skip to content

Commit

Permalink
KIKIMR-19521 BTreeIndex ShortChild
Browse files Browse the repository at this point in the history
Экономия ~26% для групп и ~16% для истории
  • Loading branch information
kunga committed Dec 1, 2023
1 parent e766ad5 commit ebe7a9b
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 56 deletions.
72 changes: 52 additions & 20 deletions ydb/core/tablet_flat/flat_page_btree_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ namespace NKikimr::NTable::NPage {
struct THeader {
TRecIdx KeysCount;
TPgSize KeysSize;
ui8 FixedKeySize;
ui8 IsShortChildFormat : 1;
ui8 FixedKeySize : 7;

const static ui8 MaxFixedKeySize = 127;
} Y_PACKED;

static_assert(sizeof(THeader) == 9, "Invalid TBtreeIndexNode THeader size");
Expand All @@ -80,23 +83,36 @@ namespace NKikimr::NTable::NPage {

static_assert(sizeof(TIsNullBitmap) == 1, "Invalid TBtreeIndexNode TIsNullBitmap size");

struct TShortChild {
TPageId PageId;
TRowId Count;
ui64 DataSize;

auto operator<=>(const TShortChild&) const = default;
} Y_PACKED;

static_assert(sizeof(TShortChild) == 20, "Invalid TBtreeIndexNode TShortChild size");

struct TChild {
TPageId PageId;
TRowId Count;
TRowId ErasedCount;
ui64 DataSize;
TRowId ErasedCount;

auto operator<=>(const TChild&) const = default;

TString ToString() const noexcept
{
// copy values to prevent 'reference binding to misaligned address' error
return TStringBuilder() << "PageId: " << TPageId(PageId) << " Count: " << TRowId(Count) << " Erased: " << TRowId(ErasedCount) << " DataSize: " << ui64(DataSize);
return TStringBuilder() << "PageId: " << PageId << " Count: " << Count << " DataSize: " << DataSize << " Erased: " << ErasedCount;
}
} Y_PACKED;

static_assert(sizeof(TChild) == 28, "Invalid TBtreeIndexNode TChild size");

static_assert(offsetof(TChild, PageId) == offsetof(TShortChild, PageId));
static_assert(offsetof(TChild, Count) == offsetof(TShortChild, Count));
static_assert(offsetof(TChild, DataSize) == offsetof(TShortChild, DataSize));

#pragma pack(pop)

struct TCellsIter {
Expand Down Expand Up @@ -242,7 +258,7 @@ namespace NKikimr::NTable::NPage {
offset += Header->KeysSize;

Children = TDeref<const TChild>::At(Header, offset);
offset += (1 + Header->KeysCount) * sizeof(TChild);
offset += (1 + Header->KeysCount) * (Header->IsShortChildFormat ? sizeof(TShortChild) : sizeof(TChild));

Y_ABORT_UNLESS(offset == data.Page.size());
}
Expand All @@ -254,7 +270,7 @@ namespace NKikimr::NTable::NPage {

bool IsFixedFormat() const noexcept
{
return Header->FixedKeySize != Max<ui8>();
return Header->FixedKeySize != Header->MaxFixedKeySize;
}

TRecIdx GetKeysCount() const noexcept
Expand All @@ -277,9 +293,23 @@ namespace NKikimr::NTable::NPage {
return GetCells<TCellsIter>(pos, columns);
}

const TChild& GetChild(TRecIdx pos) const noexcept
const TShortChild& GetShortChild(TRecIdx pos) const noexcept
{
return Children[pos];
if (Header->IsShortChildFormat) {
return *TDeref<const TShortChild>::At(Children, pos * sizeof(TShortChild));
} else {
return *TDeref<const TShortChild>::At(Children, pos * sizeof(TChild));
}
}

TChild GetChild(TRecIdx pos) const noexcept
{
if (Header->IsShortChildFormat) {
const TShortChild* const shortChild = TDeref<const TShortChild>::At(Children, pos * sizeof(TShortChild));
return { shortChild->PageId, shortChild->Count, shortChild->DataSize, 0 };
} else {
return *TDeref<const TChild>::At(Children, pos * sizeof(TChild));
}
}

static bool Has(TRowId rowId, TRowId beginRowId, TRowId endRowId) noexcept {
Expand All @@ -294,44 +324,46 @@ namespace NKikimr::NTable::NPage {
on = { };
}

const auto cmp = [](TRowId rowId, const TChild& child) {
return rowId < child.Count;
auto range = xrange(0u, childrenCount);
const auto cmp = [this](TRowId rowId, TPos pos) {
return rowId < GetShortChild(pos).Count;
};

TRecIdx result;
if (!on) {
// Use a full binary search
result = std::upper_bound(Children, Children + childrenCount, rowId, cmp) - Children;
} else if (Children[*on].Count <= rowId) {
// Will do a full binary search on full range
} else if (GetShortChild(*on).Count <= rowId) {
// Try a short linear search first
result = *on;
for (int linear = 0; linear < 4; ++linear) {
result++;
Y_ABORT_UNLESS(result < childrenCount, "Should always seek some child");
if (Children[result].Count > rowId) {
if (GetShortChild(result).Count > rowId) {
return result;
}
}

// Binary search from the next record
result = std::upper_bound(Children + result + 1, Children + childrenCount, rowId, cmp) - Children;
// Will do a binary search from the next record
range = xrange(result + 1, childrenCount);
} else { // Children[*on].Count > rowId
// Try a short linear search first
result = *on;
for (int linear = 0; linear < 4; ++linear) {
if (result == 0) {
return 0;
}
if (Children[result - 1].Count <= rowId) {
if (GetShortChild(result - 1).Count <= rowId) {
return result;
}
result--;
}

// Binary search up to current record
result = std::upper_bound(Children, Children + result, rowId, cmp) - Children;
// Will do a binary search up to current record
range = xrange(0u, result);
}

result = *std::upper_bound(range.begin(), range.end(), rowId, cmp);

Y_ABORT_UNLESS(result < childrenCount, "Should always seek some child");
return result;
}
Expand Down Expand Up @@ -428,7 +460,7 @@ namespace NKikimr::NTable::NPage {
const THeader* Header = nullptr;
const void* Keys = nullptr;
const TRecordsEntry* Offsets = nullptr;
const TChild* Children = nullptr;
const void* Children = nullptr;
};

struct TBtreeIndexMeta : public TBtreeIndexNode::TChild {
Expand Down
47 changes: 34 additions & 13 deletions ydb/core/tablet_flat/flat_page_btree_index_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace NKikimr::NTable::NPage {
class TBtreeIndexNodeWriter {
using THeader = TBtreeIndexNode::THeader;
using TIsNullBitmap = TBtreeIndexNode::TIsNullBitmap;
using TShortChild = TBtreeIndexNode::TShortChild;
using TChild = TBtreeIndexNode::TChild;

public:
Expand All @@ -17,20 +18,26 @@ namespace NKikimr::NTable::NPage {
, GroupInfo(Scheme->GetLayout(groupId))
{
if (GroupId.IsMain()) {
FixedKeySize = Max<ui8>();
// TODO: some main groups without nulls and var-sized cells also may use fixed format
FixedKeySize = TBtreeIndexNode::THeader::MaxFixedKeySize;
} else {
FixedKeySize = 0;
for (TPos pos : xrange(GroupInfo.KeyTypes.size())) {
Y_ABORT_UNLESS(GroupInfo.ColsKeyIdx[pos].IsFixed);
FixedKeySize += GroupInfo.ColsKeyIdx[pos].FixedSize;
}
Y_ABORT_UNLESS(FixedKeySize < Max<ui8>(), "KeysSize is out of bounds");
Y_ABORT_UNLESS(FixedKeySize < TBtreeIndexNode::THeader::MaxFixedKeySize, "FixedKeySize is out of bounds");
}
}

bool IsFixedFormat() const noexcept
{
return FixedKeySize != Max<ui8>();
return FixedKeySize != TBtreeIndexNode::THeader::MaxFixedKeySize;
}

bool IsShortChildFormat() const noexcept
{
return !GroupId.IsMain();
}

void AddKey(TCellsRef cells) {
Expand All @@ -43,6 +50,7 @@ namespace NKikimr::NTable::NPage {
}

void AddChild(TChild child) {
Y_ABORT_UNLESS(child.ErasedCount == 0 || !IsShortChildFormat(), "Short format can't have ErasedCount");
Children.push_back(child);
}

Expand Down Expand Up @@ -95,6 +103,7 @@ namespace NKikimr::NTable::NPage {
header.KeysCount = Keys.size();
Y_ABORT_UNLESS(KeysSize < Max<TPgSize>(), "KeysSize is out of bounds");
header.KeysSize = KeysSize;
header.IsShortChildFormat = IsShortChildFormat();
header.FixedKeySize = FixedKeySize;

if (!IsFixedFormat()) {
Expand All @@ -118,7 +127,10 @@ namespace NKikimr::NTable::NPage {
Keys.clear();
KeysSize = 0;

PlaceVector(Children);
for (auto &child : Children) {
PlaceChild(child);
}
Children.clear();

Y_ABORT_UNLESS(Ptr == End);
NSan::CheckMemIsInitialized(buf.data(), buf.size());
Expand All @@ -137,15 +149,18 @@ namespace NKikimr::NTable::NPage {
sizeof(TLabel) + sizeof(THeader) +
(IsFixedFormat() ? 0 : sizeof(TRecordsEntry) * keysCount) +
keysSize +
sizeof(TChild) * (keysCount + 1);
(IsShortChildFormat() ? sizeof(TShortChild) : sizeof(TChild)) * (keysCount + 1);
}

size_t GetKeysCount() const {
return Keys.size();
}

TPgSize CalcKeySizeWithMeta(TCellsRef cells) const noexcept {
return sizeof(TRecordsEntry) + CalcKeySize(cells) + sizeof(TChild);
return
sizeof(TRecordsEntry) +
CalcKeySize(cells) +
(IsShortChildFormat() ? sizeof(TShortChild) : sizeof(TChild));
}

private:
Expand Down Expand Up @@ -231,12 +246,13 @@ namespace NKikimr::NTable::NPage {
std::copy(data.data(), data.data() + data.size(), Advance(data.size()));
}

template<typename T>
void PlaceVector(TVector<T> &vector) noexcept
void PlaceChild(const TChild& child) noexcept
{
auto *dst = reinterpret_cast<T*>(Advance(sizeof(T)*vector.size()));
std::copy(vector.begin(), vector.end(), dst);
vector.clear();
if (IsShortChildFormat()) {
Place<TShortChild>() = TShortChild{child.PageId, child.Count, child.DataSize};
} else {
Place<TChild>() = child;
}
}

template<typename T>
Expand Down Expand Up @@ -277,6 +293,7 @@ namespace NKikimr::NTable::NPage {

class TBtreeIndexBuilder {
public:
using TShortChild = TBtreeIndexNode::TShortChild;
using TChild = TBtreeIndexNode::TChild;

private:
Expand Down Expand Up @@ -357,11 +374,15 @@ namespace NKikimr::NTable::NPage {
Levels[0].PushKey(Writer.SerializeKey(cells));
}

void AddShortChild(TShortChild child) {
AddChild(TChild{child.PageId, child.Count, child.DataSize, 0});
}

void AddChild(TChild child) {
// aggregate in order to perform search by row id from any leaf node
child.Count = (ChildrenCount += child.Count);
child.ErasedCount = (ChildrenErasedCount += child.ErasedCount);
child.DataSize = (ChildrenSize += child.DataSize);
child.ErasedCount = (ChildrenErasedCount += child.ErasedCount);

Levels[0].PushChild(child);
}
Expand Down Expand Up @@ -441,7 +462,7 @@ namespace NKikimr::NTable::NPage {
if (levelIndex + 1 == Levels.size()) {
Levels.emplace_back();
}
Levels[levelIndex + 1].PushChild(TChild{pageId, lastChild.Count, lastChild.ErasedCount, lastChild.DataSize});
Levels[levelIndex + 1].PushChild(TChild{pageId, lastChild.Count, lastChild.DataSize, lastChild.ErasedCount});
if (!last) {
Levels[levelIndex + 1].PushKey(Levels[levelIndex].PopKey());
}
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/tablet_flat/flat_part_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ void TLoader::StageParseMeta() noexcept
NPage::TBtreeIndexMeta converted{{
meta.GetRootPageId(),
meta.GetCount(),
meta.GetErasedCount(),
meta.GetDataSize()},
meta.GetDataSize(),
meta.GetErasedCount()},
meta.GetLevelsCount(),
meta.GetIndexSize()};
(history ? BTreeHistoricIndexes : BTreeGroupIndexes).push_back(converted);
Expand Down
5 changes: 2 additions & 3 deletions ydb/core/tablet_flat/flat_part_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,11 +796,10 @@ namespace NTable {
g.BTreeIndex.AddKey(Key);
}
if (groupId.IsMain()) {
g.BTreeIndex.AddChild({page, dataPage->Count, Current.BTreeIndexErased, raw.size()});
g.BTreeIndex.AddChild({page, dataPage->Count, raw.size(), Current.BTreeIndexErased});
Current.BTreeIndexErased = 0;
} else {
// TODO: don't write erased for non-main groups
g.BTreeIndex.AddChild({page, dataPage->Count, 0, raw.size()});
g.BTreeIndex.AddShortChild({page, dataPage->Count, raw.size()});
}
g.BTreeIndex.Flush(Pager, false);
}
Expand Down
4 changes: 2 additions & 2 deletions ydb/core/tablet_flat/test/libs/table/test_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ namespace NTest {
NPage::TBtreeIndexMeta converted{{
meta.GetRootPageId(),
meta.GetCount(),
meta.GetErasedCount(),
meta.GetDataSize()},
meta.GetDataSize(),
meta.GetErasedCount()},
meta.GetLevelsCount(),
meta.GetIndexSize()};
(history ? BTreeHistoricIndexes : BTreeGroupIndexes).push_back(converted);
Expand Down
Loading

0 comments on commit ebe7a9b

Please sign in to comment.