From 8db9ddc0443fe08dda568c02070a1553415aef3c Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 10:57:13 +0300 Subject: [PATCH 01/31] direct records usage clean --- .../tx/columnshard/counters/engine_logs.cpp | 30 +++------------- .../tx/columnshard/engines/column_engine.cpp | 4 +-- .../tx/columnshard/engines/column_engine.h | 20 ----------- .../engines/column_engine_logs.cpp | 11 ------ .../tx/columnshard/engines/portion_info.cpp | 5 --- .../tx/columnshard/engines/portion_info.h | 7 ---- .../engines/portions/column_record.h | 4 --- .../engines/portions/portion_info.h | 4 +-- .../engines/portions/read_with_blobs.cpp | 2 +- .../engines/reader/sys_view/chunks/chunks.cpp | 2 +- .../engines/storage/granule/granule.cpp | 3 -- .../engines/storage/granule/granule.h | 34 ------------------- .../engines/storage/granule/storage.cpp | 9 +++-- ydb/core/tx/columnshard/engines/ya.make | 1 - 14 files changed, 15 insertions(+), 121 deletions(-) delete mode 100644 ydb/core/tx/columnshard/engines/portion_info.cpp delete mode 100644 ydb/core/tx/columnshard/engines/portion_info.h diff --git a/ydb/core/tx/columnshard/counters/engine_logs.cpp b/ydb/core/tx/columnshard/counters/engine_logs.cpp index 3285db3f7d8c..2837a2cd2547 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.cpp +++ b/ydb/core/tx/columnshard/counters/engine_logs.cpp @@ -87,19 +87,10 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnNewPortion(const std::shared_ptr const ui32 producedId = (ui32)(portion->HasRemoveSnapshot() ? NOlap::NPortion::EProduced::INACTIVE : portion->GetMeta().Produced); Y_ABORT_UNLESS(producedId < BlobGuards.size()); THashSet blobIds; - for (auto&& i : portion->GetRecords()) { + for (auto&& blobId : portion->GetBlobIds()) { const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); - } - } - for (auto&& i : portion->GetIndexes()) { - if (i.HasBlobRange()) { - const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); - } - } + AFL_VERIFY(blobIds.emplace(blobId).second) + BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); } PortionRecordCountGuards[producedId]->Add(portion->GetRecordsCount(), 1); PortionSizeGuards[producedId]->Add(portion->GetTotalBlobBytes(), 1); @@ -109,19 +100,8 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnDropPortion(const std::shared_pt const ui32 producedId = (ui32)(portion->HasRemoveSnapshot() ? NOlap::NPortion::EProduced::INACTIVE : portion->GetMeta().Produced); Y_ABORT_UNLESS(producedId < BlobGuards.size()); THashSet blobIds; - for (auto&& i : portion->GetRecords()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); - } - } - for (auto&& i : portion->GetIndexes()) { - if (i.HasBlobRange()) { - const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); - } - } + for (auto&& blobId : portion->GetBlobIds()) { + BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); } PortionRecordCountGuards[producedId]->Sub(portion->GetRecordsCount(), 1); PortionSizeGuards[producedId]->Sub(portion->GetTotalBlobBytes(), 1); diff --git a/ydb/core/tx/columnshard/engines/column_engine.cpp b/ydb/core/tx/columnshard/engines/column_engine.cpp index 0771ecaeec1d..334d77921907 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine.cpp @@ -41,8 +41,8 @@ TSelectInfo::TStats TSelectInfo::Stats() const { for (auto& portionInfo : PortionsOrderedPK) { out.Records += portionInfo->NumChunks(); out.Rows += portionInfo->NumRows(); - for (auto& rec : portionInfo->Records) { - out.Bytes += rec.BlobRange.Size; + for (auto& blobId : portionInfo->GetBlobIds()) { + out.Bytes += blobId.BlobSize(); } out.Blobs += portionInfo->GetBlobIdsCount(); } diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index a7830fdfd5a7..816fa2f9f8de 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -75,8 +75,6 @@ class TColumnEngineStats { i64 Rows = 0; i64 Bytes = 0; i64 RawBytes = 0; - THashMap BytesByColumn; - THashMap RawBytesByColumn; TString DebugString() const { return TStringBuilder() << "portions=" << Portions << ";blobs=" << Blobs << ";rows=" << Rows << ";bytes=" << Bytes << ";raw_bytes=" << RawBytes << ";"; @@ -94,14 +92,6 @@ class TColumnEngineStats { result.Rows = kff * Rows; result.Bytes = kff * Bytes; result.RawBytes = kff * RawBytes; - - for (auto&& i : BytesByColumn) { - result.BytesByColumn[i.first] = kff * i.second; - } - - for (auto&& i : RawBytesByColumn) { - result.RawBytesByColumn[i.first] = kff * i.second; - } return result; } @@ -115,21 +105,11 @@ class TColumnEngineStats { Rows = SumVerifiedPositive(Rows, item.Rows); Bytes = SumVerifiedPositive(Bytes, item.Bytes); RawBytes = SumVerifiedPositive(RawBytes, item.RawBytes); - for (auto&& i : item.BytesByColumn) { - auto& v = BytesByColumn[i.first]; - v = SumVerifiedPositive(v, i.second); - } - - for (auto&& i : item.RawBytesByColumn) { - auto& v = RawBytesByColumn[i.first]; - v = SumVerifiedPositive(v, i.second); - } return *this; } }; i64 Tables{}; - i64 ColumnRecords{}; THashMap StatsByType; std::vector GetKinds() const { diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 80ddb806292c..01ef43308854 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -81,10 +81,6 @@ void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, E TColumnEngineStats::TPortionsStats DeltaStats(const TPortionInfo& portionInfo) { TColumnEngineStats::TPortionsStats deltaStats; deltaStats.Bytes = 0; - for (auto& rec : portionInfo.Records) { - deltaStats.BytesByColumn[rec.ColumnId] += rec.BlobRange.Size; - deltaStats.RawBytesByColumn[rec.ColumnId] += rec.GetMeta().GetRawBytes(); - } deltaStats.Rows = portionInfo.NumRows(); deltaStats.Bytes = portionInfo.GetTotalBlobBytes(); deltaStats.RawBytes = portionInfo.GetTotalRawBytes(); @@ -96,7 +92,6 @@ TColumnEngineStats::TPortionsStats DeltaStats(const TPortionInfo& portionInfo) { void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, const TPortionInfo& portionInfo, EStatsUpdateType updateType, const TPortionInfo* exPortionInfo) const { - ui64 columnRecords = portionInfo.Records.size(); TColumnEngineStats::TPortionsStats deltaStats = DeltaStats(portionInfo); Y_ABORT_UNLESS(!exPortionInfo || exPortionInfo->GetMeta().Produced != TPortionMeta::EProduced::UNSPECIFIED); @@ -115,20 +110,14 @@ void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, c const bool isAdd = updateType == EStatsUpdateType::ADD; if (isErase) { // PortionsToDrop - engineStats.ColumnRecords -= columnRecords; - stats -= deltaStats; } else if (isAdd) { // Load || AppendedPortions - engineStats.ColumnRecords += columnRecords; - stats += deltaStats; } else if (&srcStats != &stats || exPortionInfo) { // SwitchedPortions || PortionsToEvict stats += deltaStats; if (exPortionInfo) { srcStats -= DeltaStats(*exPortionInfo); - - engineStats.ColumnRecords += columnRecords - exPortionInfo->Records.size(); } else { srcStats -= deltaStats; } diff --git a/ydb/core/tx/columnshard/engines/portion_info.cpp b/ydb/core/tx/columnshard/engines/portion_info.cpp deleted file mode 100644 index 9b11963e99be..000000000000 --- a/ydb/core/tx/columnshard/engines/portion_info.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "portion_info.h" - -namespace NKikimr::NOlap { - -} diff --git a/ydb/core/tx/columnshard/engines/portion_info.h b/ydb/core/tx/columnshard/engines/portion_info.h deleted file mode 100644 index 673e4f6c0b16..000000000000 --- a/ydb/core/tx/columnshard/engines/portion_info.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include "portions/portion_info.h" - -namespace NKikimr::NOlap { - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index fd2efc97e9b9..5ddc7990b726 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -153,10 +153,6 @@ class TColumnRecord { return ColumnId == item.ColumnId && Chunk == item.Chunk; } - bool Valid() const { - return ColumnId && BlobRange.IsValid(); - } - TString DebugString() const { return TStringBuilder() << "column_id:" << ColumnId << ";" << "chunk_idx:" << Chunk << ";" diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 02b0fdd7ec63..335092b9e95b 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -132,6 +132,8 @@ class TPortionInfo { } } } + std::vector Records; + public: ui32 GetCompactionLevel() const { return GetMeta().GetCompactionLevel(); @@ -299,8 +301,6 @@ class TPortionInfo { std::vector BuildPages() const; - std::vector Records; - const std::vector& GetRecords() const { return Records; } diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 49be899b7e8d..67b94cb1ba45 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -16,7 +16,7 @@ void TReadPortionInfoWithBlobs::RestoreChunk(const std::shared_ptr> TReadPortionInfoWithBlobs::RestoreBatch( const ISnapshotSchema& data, const ISnapshotSchema& resultSchema, const std::set& seqColumns) const { THashMap blobs; - for (auto&& i : PortionInfo.Records) { + for (auto&& i : PortionInfo.GetRecords()) { blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index 344d6f370493..3feda5330345 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -21,7 +21,7 @@ void TStatsIterator::AppendStats(const std::vector records; - for (auto&& r : portion.Records) { + for (auto&& r : portion.GetRecords()) { records.emplace_back(&r); } if (Reverse) { diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp index 2580264831f9..70ca2514eddf 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp @@ -15,9 +15,6 @@ void TGranuleMeta::UpsertPortion(const TPortionInfo& info) { AFL_VERIFY(info.GetPathId() == GetPathId())("event", "incompatible_granule")("portion", info.DebugString())("path_id", GetPathId()); AFL_VERIFY(info.ValidSnapshotInfo())("event", "incorrect_portion_snapshots")("portion", info.DebugString()); - for (auto& record : info.Records) { - AFL_VERIFY(record.Valid())("event", "incorrect_record")("record", record.DebugString())("portion", info.DebugString()); - } if (it == Portions.end()) { OnBeforeChangePortion(nullptr); diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index ad0f50b0336b..5d75e8401b98 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -21,28 +21,14 @@ class TColumnChunkLoadContext; class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { private: friend class TGranuleMeta; - THashMap ColumnStats; public: - const THashMap& GetColumnStats() const { - return ColumnStats; - } - void AddPortion(const TPortionInfo& info) { ColumnPortionsSize += info.GetColumnBlobBytes(); TotalPortionsSize += info.GetTotalBlobBytes(); MetadataMemoryPortionsSize += info.GetMetadataMemorySize(); RecordsCount += info.NumRows(); ++PortionsCount; - - for (auto&& c : info.Records) { - auto it = ColumnStats.find(c.ColumnId); - if (it == ColumnStats.end()) { - it = ColumnStats.emplace(c.ColumnId, c.GetSerializationStat()).first; - } else { - it->second.AddStat(c.GetSerializationStat()); - } - } } void RemovePortion(const TPortionInfo& info) { @@ -56,15 +42,6 @@ class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { Y_ABORT_UNLESS(RecordsCount >= 0); --PortionsCount; Y_ABORT_UNLESS(PortionsCount >= 0); - - for (auto&& c : info.Records) { - auto it = ColumnStats.find(c.ColumnId); - if (it == ColumnStats.end()) { - it = ColumnStats.emplace(c.ColumnId, c.GetSerializationStat()).first; - } else { - it->second.RemoveStat(c.GetSerializationStat()); - } - } } }; @@ -297,17 +274,6 @@ class TGranuleMeta: TNonCopyable { } } - std::shared_ptr BuildSerializationStats(ISnapshotSchema::TPtr schema) const { - auto result = std::make_shared(); - for (auto&& i : GetAdditiveSummary().GetCompacted().GetColumnStats()) { - auto field = schema->GetFieldByColumnIdVerified(i.first); - NArrow::NSplitter::TColumnSerializationStat columnInfo(i.first, field->name()); - columnInfo.Merge(i.second); - result->AddStat(columnInfo); - } - return result; - } - const TGranuleAdditiveSummary& GetAdditiveSummary() const; NStorageOptimizer::TOptimizationPriority GetCompactionPriority() const { diff --git a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp index 32b72c5ee9f8..a8c9a092bc32 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp @@ -17,9 +17,7 @@ class TGranuleOrdered { TGranuleOrdered(const NStorageOptimizer::TOptimizationPriority& priority, const std::shared_ptr& meta) : Priority(priority) - , Granule(meta) - { - + , Granule(meta) { } bool operator<(const TGranuleOrdered& item) const { @@ -29,8 +27,8 @@ class TGranuleOrdered { } // namespace std::optional TGranulesStorage::GetCompactionPriority( - const std::shared_ptr& dataLocksManager, const std::set& pathIds, - const std::optional waitingPriority, std::shared_ptr* granuleResult) const { + const std::shared_ptr& dataLocksManager, const std::set& pathIds, const std::optional waitingPriority, + std::shared_ptr* granuleResult) const { const TInstant now = HasAppData() ? AppDataVerified().TimeProvider->Now() : TInstant::Now(); std::vector granulesSorted; std::optional priorityChecker; @@ -65,6 +63,7 @@ std::optional TGranulesStorage::GetCom maxPriorityGranule = granulesSorted.front().GetGranule(); break; } + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "granule_locked")("path_id", granulesSorted.front().GetGranule()->GetPathId()); std::pop_heap(granulesSorted.begin(), granulesSorted.end()); granulesSorted.pop_back(); } diff --git a/ydb/core/tx/columnshard/engines/ya.make b/ydb/core/tx/columnshard/engines/ya.make index d49a325a7832..00096a94e82e 100644 --- a/ydb/core/tx/columnshard/engines/ya.make +++ b/ydb/core/tx/columnshard/engines/ya.make @@ -11,7 +11,6 @@ SRCS( db_wrapper.cpp index_info.cpp filter.cpp - portion_info.cpp tier_info.cpp defs.cpp ) From a5c4ac317c0e1783f63403e056c4123680bb7fb0 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 10:58:40 +0300 Subject: [PATCH 02/31] fix --- ydb/core/tx/columnshard/engines/portions/portion_info.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 335092b9e95b..b24164c3a9b5 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -135,6 +135,10 @@ class TPortionInfo { std::vector Records; public: + const std::vector& GetBlobIds() const { + return BlobIds; + } + ui32 GetCompactionLevel() const { return GetMeta().GetCompactionLevel(); } From bae3934a73a4119b43418406051ccd346bccac93 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 11:00:07 +0300 Subject: [PATCH 03/31] fix --- ydb/core/tx/columnshard/engines/predicate/range.h | 6 +++--- .../columnshard/engines/writer/indexed_blob_constructor.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/predicate/range.h b/ydb/core/tx/columnshard/engines/predicate/range.h index 6f9f264b7d70..705fda77d451 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.h +++ b/ydb/core/tx/columnshard/engines/predicate/range.h @@ -1,7 +1,8 @@ #pragma once #include "container.h" -#include + #include +#include namespace NKikimr::NOlap { @@ -15,7 +16,6 @@ class TPKRangeFilter { } public: - bool IsEmpty() const { return PredicateFrom.IsEmpty() && PredicateTo.IsEmpty(); } @@ -48,4 +48,4 @@ class TPKRangeFilter { std::set GetColumnNames() const; }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h index dd993d314215..7dba87d3bb0b 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include namespace NKikimr::NColumnShard { From b83a8904f8e9d7b81f879d8f2a3802a843089c8a Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 11:04:35 +0300 Subject: [PATCH 04/31] fix --- ydb/core/tx/columnshard/counters/engine_logs.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/ydb/core/tx/columnshard/counters/engine_logs.cpp b/ydb/core/tx/columnshard/counters/engine_logs.cpp index 2837a2cd2547..b1c5ae1fd33d 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.cpp +++ b/ydb/core/tx/columnshard/counters/engine_logs.cpp @@ -86,10 +86,7 @@ void TEngineLogsCounters::OnActualizationTask(const ui32 evictCount, const ui32 void TEngineLogsCounters::TPortionsInfoGuard::OnNewPortion(const std::shared_ptr& portion) const { const ui32 producedId = (ui32)(portion->HasRemoveSnapshot() ? NOlap::NPortion::EProduced::INACTIVE : portion->GetMeta().Produced); Y_ABORT_UNLESS(producedId < BlobGuards.size()); - THashSet blobIds; for (auto&& blobId : portion->GetBlobIds()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - AFL_VERIFY(blobIds.emplace(blobId).second) BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); } PortionRecordCountGuards[producedId]->Add(portion->GetRecordsCount(), 1); From 33c43c60bcad84398eef9505d83b5cf498d58b4e Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 11:17:55 +0300 Subject: [PATCH 05/31] fix --- .../tx/columnshard/engines/changes/with_appended.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index 854c082155bd..0e0478327fd2 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -73,18 +73,14 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("portions", sb)("task_id", GetTaskIdentifier()); self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); - THashSet blobsDeactivated; for (auto& [_, portionInfo] : PortionsToRemove) { - for (auto& rec : portionInfo.Records) { - blobsDeactivated.emplace(portionInfo.GetBlobId(rec.BlobRange.GetBlobIdxVerified())); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, portionInfo.GetBlobIdsCount()); + for (auto& blobId : portionInfo.GetBlobIds()) { + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); } self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); } - self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); - for (auto& blobId : blobsDeactivated) { - self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); - } } if (PortionsToMove.size()) { THashMap portionGroups; From c8d4fba86ed91b913aabdf979bc475d9b8449b9d Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 11:48:55 +0300 Subject: [PATCH 06/31] fix --- ydb/core/tx/columnshard/columnshard.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index 56794f1c0520..636327c597fc 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -270,7 +270,6 @@ void TColumnShard::UpdateIndexCounters() { auto& stats = TablesManager.MutablePrimaryIndex().GetTotalStats(); const std::shared_ptr& counters = Counters.GetTabletCounters(); counters->SetCounter(COUNTER_INDEX_TABLES, stats.Tables); - counters->SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); counters->SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); counters->SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); counters->SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); @@ -300,7 +299,7 @@ void TColumnShard::UpdateIndexCounters() { LOG_S_DEBUG("Index: tables " << stats.Tables << " inserted " << stats.GetInsertedStats().DebugString() << " compacted " << stats.GetCompactedStats().DebugString() << " s-compacted " << stats.GetSplitCompactedStats().DebugString() << " inactive " << stats.GetInactiveStats().DebugString() << " evicted " - << stats.GetEvictedStats().DebugString() << " column records " << stats.ColumnRecords << " at tablet " + << stats.GetEvictedStats().DebugString() << " at tablet " << TabletID()); } From 4d489bb1834616b5a8998ef4344eed2ffc1bfe02 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 12:22:23 +0300 Subject: [PATCH 07/31] PortionId --- .../engines/portions/portion_info.cpp | 8 ++++---- .../engines/portions/portion_info.h | 18 +++++++----------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index e450981e3db5..757c89195b32 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -65,7 +65,7 @@ ui64 TPortionInfo::GetIndexRawBytes(const bool validation) const { TString TPortionInfo::DebugString(const bool withDetails) const { TStringBuilder sb; - sb << "(portion_id:" << Portion << ";" << + sb << "(portion_id:" << PortionId << ";" << "path_id:" << PathId << ";records_count:" << NumRows() << ";" "min_schema_snapshot:(" << MinSnapshotDeprecated.DebugString() << ");" "schema_version:" << SchemaVersion.value_or(0) << ";" @@ -213,7 +213,7 @@ ui64 TPortionInfo::GetTxVolume() const { void TPortionInfo::SerializeToProto(NKikimrColumnShardDataSharingProto::TPortionInfo& proto) const { proto.SetPathId(PathId); - proto.SetPortionId(Portion); + proto.SetPortionId(PortionId); proto.SetSchemaVersion(GetSchemaVersionVerified()); *proto.MutableMinSnapshotDeprecated() = MinSnapshotDeprecated.SerializeToProto(); if (!RemoveSnapshot.IsZero()) { @@ -236,7 +236,7 @@ void TPortionInfo::SerializeToProto(NKikimrColumnShardDataSharingProto::TPortion TConclusionStatus TPortionInfo::DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto) { PathId = proto.GetPathId(); - Portion = proto.GetPortionId(); + PortionId = proto.GetPortionId(); SchemaVersion = proto.GetSchemaVersion(); for (auto&& i : proto.GetBlobIds()) { auto blobId = TUnifiedBlobId::BuildFromProto(i); @@ -502,7 +502,7 @@ void TPortionInfo::FullValidation() const { CheckChunksOrder(Records); CheckChunksOrder(Indexes); AFL_VERIFY(PathId); - AFL_VERIFY(Portion); + AFL_VERIFY(PortionId); AFL_VERIFY(MinSnapshotDeprecated.Valid()); std::set blobIdxs; for (auto&& i : Records) { diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index b24164c3a9b5..7c408471ef40 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -78,7 +78,7 @@ class TPortionInfo { std::optional InsertWriteId; ui64 PathId = 0; - ui64 Portion = 0; // Id of independent (overlayed by PK) portion of data in pathId + ui64 PortionId = 0; // Id of independent (overlayed by PK) portion of data in pathId TSnapshot MinSnapshotDeprecated = TSnapshot::Zero(); // {PlanStep, TxId} is min snapshot for {Granule, Portion} TSnapshot RemoveSnapshot = TSnapshot::Zero(); // {XPlanStep, XTxId} is snapshot where the blob has been removed (i.e. compacted into another one) std::optional SchemaVersion; @@ -342,12 +342,12 @@ class TPortionInfo { } ui64 GetPortionId() const { - return Portion; + return PortionId; } NJson::TJsonValue SerializeToJsonVisual() const { NJson::TJsonValue result = NJson::JSON_MAP; - result.InsertValue("id", Portion); + result.InsertValue("id", PortionId); result.InsertValue("s_max", RecordSnapshotMax().GetPlanStep() / 1000); /* result.InsertValue("s_min", RecordSnapshotMin().GetPlanStep()); @@ -420,7 +420,7 @@ class TPortionInfo { return false; } - bool ValidSnapshotInfo() const { return MinSnapshotDeprecated.Valid() && PathId && Portion; } + bool ValidSnapshotInfo() const { return MinSnapshotDeprecated.Valid() && PathId && PortionId; } size_t NumChunks() const { return Records.size(); } TString DebugString(const bool withDetails = false) const; @@ -445,12 +445,8 @@ class TPortionInfo { return HasRemoveSnapshot(); } - ui64 GetPortion() const { - return Portion; - } - TPortionAddress GetAddress() const { - return TPortionAddress(PathId, Portion); + return TPortionAddress(PathId, PortionId); } void ResetShardingVersion() { @@ -461,8 +457,8 @@ class TPortionInfo { PathId = pathId; } - void SetPortion(const ui64 portion) { - Portion = portion; + void SetPortionId(const ui64 id) { + PortionId = id; } From f64b33b591b83a6a041535c3a9955e81ca70f3ad Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 12:25:13 +0300 Subject: [PATCH 08/31] fix --- ydb/core/tx/columnshard/engines/db_wrapper.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.cpp b/ydb/core/tx/columnshard/engines/db_wrapper.cpp index 2f3687563202..11ad657cd6b4 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.cpp +++ b/ydb/core/tx/columnshard/engines/db_wrapper.cpp @@ -52,7 +52,7 @@ void TDbWrapper::WriteColumn(const NOlap::TPortionInfo& portion, const TColumnRe using IndexColumns = NColumnShard::Schema::IndexColumns; auto removeSnapshot = portion.GetRemoveSnapshotOptional(); db.Table().Key(0, 0, row.ColumnId, - portion.GetMinSnapshotDeprecated().GetPlanStep(), portion.GetMinSnapshotDeprecated().GetTxId(), portion.GetPortion(), row.Chunk).Update( + portion.GetMinSnapshotDeprecated().GetPlanStep(), portion.GetMinSnapshotDeprecated().GetTxId(), portion.GetPortionId(), row.Chunk).Update( NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetPlanStep() : 0), NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetTxId() : 0), NIceDb::TUpdate(portion.GetBlobId(row.GetBlobRange().GetBlobIdxVerified()).SerializeBinary()), @@ -72,7 +72,7 @@ void TDbWrapper::WritePortion(const NOlap::TPortionInfo& portion) { const auto insertWriteId = portion.GetInsertWriteIdOptional(); const auto minSnapshotDeprecated = portion.GetMinSnapshotDeprecated(); db.Table() - .Key(portion.GetPathId(), portion.GetPortion()) + .Key(portion.GetPathId(), portion.GetPortionId()) .Update(NIceDb::TUpdate(portion.GetSchemaVersionVerified()), NIceDb::TUpdate(portion.GetShardingVersionDef(0)), NIceDb::TUpdate(commitSnapshot ? commitSnapshot->GetPlanStep() : 0), @@ -88,14 +88,14 @@ void TDbWrapper::WritePortion(const NOlap::TPortionInfo& portion) { void TDbWrapper::ErasePortion(const NOlap::TPortionInfo& portion) { NIceDb::TNiceDb db(Database); using IndexPortions = NColumnShard::Schema::IndexPortions; - db.Table().Key(portion.GetPathId(), portion.GetPortion()).Delete(); + db.Table().Key(portion.GetPathId(), portion.GetPortionId()).Delete(); } void TDbWrapper::EraseColumn(const NOlap::TPortionInfo& portion, const TColumnRecord& row) { NIceDb::TNiceDb db(Database); using IndexColumns = NColumnShard::Schema::IndexColumns; db.Table().Key(0, 0, row.ColumnId, - portion.GetMinSnapshotDeprecated().GetPlanStep(), portion.GetMinSnapshotDeprecated().GetTxId(), portion.GetPortion(), row.Chunk).Delete(); + portion.GetMinSnapshotDeprecated().GetPlanStep(), portion.GetMinSnapshotDeprecated().GetTxId(), portion.GetPortionId(), row.Chunk).Delete(); } bool TDbWrapper::LoadColumns(const std::function& callback) { From dc94eaace0a27deac36668f78c8103af417cca9d Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 12:27:31 +0300 Subject: [PATCH 09/31] fix --- ydb/core/tx/columnshard/engines/portions/constructor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.cpp b/ydb/core/tx/columnshard/engines/portions/constructor.cpp index 216628d89e4c..d2de998738a3 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor.cpp @@ -15,7 +15,7 @@ TPortionInfo TPortionInfoConstructor::Build(const bool needChunksNormalization) TPortionInfo result(MetaConstructor.Build()); AFL_VERIFY(PathId); result.PathId = PathId; - result.Portion = GetPortionIdVerified(); + result.PortionId = GetPortionIdVerified(); AFL_VERIFY(MinSnapshotDeprecated); AFL_VERIFY(MinSnapshotDeprecated->Valid()); From 8b92289f35a20926da87faee764f1f22a208acf3 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 12:33:30 +0300 Subject: [PATCH 10/31] fixes --- .../data_sharing/destination/events/transfer.h | 2 +- ydb/core/tx/columnshard/engines/column_engine_logs.cpp | 2 +- .../engines/storage/optimizer/ut/ut_optimizer.cpp | 4 ++-- ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp | 8 ++++---- .../tx/columnshard/ut_rw/ut_columnshard_read_write.cpp | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h index 715cee95fb15..16a992c6f2e3 100644 --- a/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h @@ -55,7 +55,7 @@ class TPathIdData { void InitPortionIds(ui64* lastPortionId, const std::optional pathId = {}) { AFL_VERIFY(lastPortionId); for (auto&& i : Portions) { - i.SetPortion(++*lastPortionId); + i.SetPortionId(++*lastPortionId); if (pathId) { i.SetPathId(*pathId); } diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 01ef43308854..d3943c415f90 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -502,7 +502,7 @@ void TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, const } bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool updateStats) { - const ui64 portion = portionInfo.GetPortion(); + const ui64 portion = portionInfo.GetPortionId(); auto& spg = MutableGranuleVerified(portionInfo.GetPathId()); auto p = spg.GetPortionOptional(portion); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp index 420a9e5901e9..a329234bf938 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp @@ -83,8 +83,8 @@ Y_UNIT_TEST_SUITE(StorageOptimizer) { auto task = dynamic_pointer_cast(planner.GetOptimizationTask(limits, nullptr)); Y_ABORT_UNLESS(task); Y_ABORT_UNLESS(task->SwitchedPortions.size() == 2); - Y_ABORT_UNLESS(task->SwitchedPortions[0].GetPortion() == 1); - Y_ABORT_UNLESS(task->SwitchedPortions[1].GetPortion() == 2); + Y_ABORT_UNLESS(task->SwitchedPortions[0].GetPortionId() == 1); + Y_ABORT_UNLESS(task->SwitchedPortions[1].GetPortionId() == 2); } }; diff --git a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp index 6dd7bad1e4a8..43543e0e03d1 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp @@ -110,11 +110,11 @@ class TTestDbWrapper : public IDbWrapper { if (!itInsertInfo.second) { itInsertInfo.first->second = loadContext; } - auto it = data.find(portion.GetPortion()); + auto it = data.find(portion.GetPortionId()); if (it == data.end()) { - it = data.emplace(portion.GetPortion(), TPortionInfoConstructor(portion, false, true)).first; + it = data.emplace(portion.GetPortionId(), TPortionInfoConstructor(portion, false, true)).first; } else { - Y_ABORT_UNLESS(portion.GetPathId() == it->second.GetPathId() && portion.GetPortion() == it->second.GetPortionIdVerified()); + Y_ABORT_UNLESS(portion.GetPathId() == it->second.GetPathId() && portion.GetPortionId() == it->second.GetPortionIdVerified()); } it->second.SetMinSnapshotDeprecated(portion.GetMinSnapshotDeprecated()); if (portion.HasRemoveSnapshot()) { @@ -140,7 +140,7 @@ class TTestDbWrapper : public IDbWrapper { void EraseColumn(const TPortionInfo& portion, const TColumnRecord& row) override { auto& data = Indices[0].Columns[portion.GetPathId()]; - auto it = data.find(portion.GetPortion()); + auto it = data.find(portion.GetPortionId()); Y_ABORT_UNLESS(it != data.end()); auto& portionLocal = it->second; diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index cb8349bb4e70..b853d40084ad 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -2539,8 +2539,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { const ui64 pathId = portionInfo.GetPathId(); UNIT_ASSERT(!srcPathId || srcPathId == pathId); srcPathId = pathId; - oldPortions.insert(portionInfo.GetPortion()); - sb << portionInfo.GetPortion() << ","; + oldPortions.insert(portionInfo.GetPortionId()); + sb << portionInfo.GetPortionId() << ","; } sb << Endl; Cerr << sb; @@ -2551,8 +2551,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { TStringBuilder sb; sb << "Cleanup old portions:"; for (const auto& portion : cleanup->PortionsToDrop) { - sb << " " << portion.GetPortion(); - deletedPortions.insert(portion.GetPortion()); + sb << " " << portion.GetPortionId(); + deletedPortions.insert(portion.GetPortionId()); } sb << Endl; Cerr << sb; From f054fa1f701f135de02b053fa622476323932113 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 12:35:25 +0300 Subject: [PATCH 11/31] fix --- ydb/core/tx/columnshard/engines/storage/granule/granule.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp index 70ca2514eddf..096d2b47bdc0 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp @@ -11,7 +11,7 @@ namespace NKikimr::NOlap { void TGranuleMeta::UpsertPortion(const TPortionInfo& info) { AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "upsert_portion")("portion", info.DebugString())("path_id", GetPathId()); - auto it = Portions.find(info.GetPortion()); + auto it = Portions.find(info.GetPortionId()); AFL_VERIFY(info.GetPathId() == GetPathId())("event", "incompatible_granule")("portion", info.DebugString())("path_id", GetPathId()); AFL_VERIFY(info.ValidSnapshotInfo())("event", "incorrect_portion_snapshots")("portion", info.DebugString()); @@ -19,7 +19,7 @@ void TGranuleMeta::UpsertPortion(const TPortionInfo& info) { if (it == Portions.end()) { OnBeforeChangePortion(nullptr); auto portionNew = std::make_shared(info); - it = Portions.emplace(portionNew->GetPortion(), portionNew).first; + it = Portions.emplace(portionNew->GetPortionId(), portionNew).first; } else { OnBeforeChangePortion(it->second); it->second = std::make_shared(info); From d31ae942f98688dac57e1914d022fb2c738ace27 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 12:36:55 +0300 Subject: [PATCH 12/31] fix --- ydb/core/tx/columnshard/engines/changes/with_appended.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index 0e0478327fd2..089366d9704d 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -100,7 +100,7 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self for (auto& [_, portionInfo] : PortionsToRemove) { context.EngineLogs.AddCleanupPortion(portionInfo); const TPortionInfo& oldInfo = - context.EngineLogs.GetGranuleVerified(portionInfo.GetPathId()).GetPortionVerified(portionInfo.GetPortion()); + context.EngineLogs.GetGranuleVerified(portionInfo.GetPathId()).GetPortionVerified(portionInfo.GetPortionId()); context.EngineLogs.UpsertPortion(portionInfo, &oldInfo); } for (auto& portionBuilder : AppendedPortions) { From 8dcbeffec4042f7e56f1fc598f5d917cbb8aa428 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 13:17:11 +0300 Subject: [PATCH 13/31] split logic for portion data accessor --- .../engines/portions/data_accessor.cpp | 187 +++++++++++++ .../engines/portions/data_accessor.h | 222 +++++++++++++++ .../engines/portions/portion_info.cpp | 237 +++------------- .../engines/portions/portion_info.h | 263 +++--------------- .../engines/portions/read_with_blobs.cpp | 23 +- .../tx/columnshard/engines/portions/ya.make | 1 + .../plain_reader/iterator/fetched_data.h | 6 +- .../reader/plain_reader/iterator/source.cpp | 21 +- .../reader/plain_reader/iterator/source.h | 2 +- 9 files changed, 515 insertions(+), 447 deletions(-) create mode 100644 ydb/core/tx/columnshard/engines/portions/data_accessor.cpp create mode 100644 ydb/core/tx/columnshard/engines/portions/data_accessor.h diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp new file mode 100644 index 000000000000..a440b06efa48 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp @@ -0,0 +1,187 @@ +#include "data_accessor.h" + +#include +#include + +#include +#include +#include + +namespace NKikimr::NOlap { + +namespace { +template +TPortionDataAccessor::TPreparedBatchData PrepareForAssembleImpl(const TPortionDataAccessor& portionData, const TPortionInfo& portionInfo, + const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData, + const std::optional& defaultSnapshot) { + std::vector columns; + columns.reserve(resultSchema.GetColumnIds().size()); + const ui32 rowsCount = portionInfo.GetRecordsCount(); + for (auto&& i : resultSchema.GetColumnIds()) { + columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i), resultSchema.GetColumnLoaderVerified(i)); + if (portionInfo.HasInsertWriteId()) { + if (portionInfo.HasCommitSnapshot()) { + if (i == (ui32)IIndexInfo::ESpecialColumn::PLAN_STEP) { + columns.back().AddBlobInfo(0, portionInfo.GetRecordsCount(), + TPortionDataAccessor::TAssembleBlobInfo(portionInfo.GetRecordsCount(), + std::make_shared(portionInfo.GetCommitSnapshotVerified().GetPlanStep()), false)); + } + if (i == (ui32)IIndexInfo::ESpecialColumn::TX_ID) { + columns.back().AddBlobInfo(0, portionInfo.GetRecordsCount(), + TPortionDataAccessor::TAssembleBlobInfo(portionInfo.GetRecordsCount(), + std::make_shared(portionInfo.GetCommitSnapshotVerified().GetPlanStep()), false)); + } + } else { + if (i == (ui32)IIndexInfo::ESpecialColumn::PLAN_STEP) { + columns.back().AddBlobInfo(0, portionInfo.GetRecordsCount(), + TPortionDataAccessor::TAssembleBlobInfo(portionInfo.GetRecordsCount(), + std::make_shared(defaultSnapshot ? defaultSnapshot->GetPlanStep() : 0))); + } + if (i == (ui32)IIndexInfo::ESpecialColumn::TX_ID) { + columns.back().AddBlobInfo(0, portionInfo.GetRecordsCount(), + TPortionDataAccessor::TAssembleBlobInfo(portionInfo.GetRecordsCount(), + std::make_shared(defaultSnapshot ? defaultSnapshot->GetTxId() : 0))); + } + } + if (i == (ui32)IIndexInfo::ESpecialColumn::WRITE_ID) { + columns.back().AddBlobInfo(0, portionInfo.GetRecordsCount(), + TPortionDataAccessor::TAssembleBlobInfo(portionInfo.GetRecordsCount(), + std::make_shared((ui64)portionInfo.GetInsertWriteIdVerified()), false)); + } + if (i == (ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG) { + columns.back().AddBlobInfo(0, portionInfo.GetRecordsCount(), + TPortionDataAccessor::TAssembleBlobInfo(portionInfo.GetRecordsCount(), + std::make_shared((bool)portionInfo.GetMeta().GetDeletionsCount()), true)); + } + } + } + { + int skipColumnId = -1; + TPortionDataAccessor::TColumnAssemblingInfo* currentAssembler = nullptr; + for (auto& rec : portionData.GetRecords()) { + if (skipColumnId == (int)rec.ColumnId) { + continue; + } + if (!currentAssembler || rec.ColumnId != currentAssembler->GetColumnId()) { + const i32 resultPos = resultSchema.GetFieldIndex(rec.ColumnId); + if (resultPos < 0) { + skipColumnId = rec.ColumnId; + continue; + } + AFL_VERIFY((ui32)resultPos < columns.size()); + currentAssembler = &columns[resultPos]; + } + auto it = blobsData.find(rec.GetAddress()); + AFL_VERIFY(it != blobsData.end())("size", blobsData.size())("address", rec.GetAddress().DebugString()); + currentAssembler->AddBlobInfo(rec.Chunk, rec.GetMeta().GetNumRows(), std::move(it->second)); + blobsData.erase(it); + } + } + + // Make chunked arrays for columns + std::vector preparedColumns; + preparedColumns.reserve(columns.size()); + for (auto& c : columns) { + preparedColumns.emplace_back(c.Compile()); + } + + return TPortionDataAccessor::TPreparedBatchData(std::move(preparedColumns), rowsCount); +} + +} // namespace + +TPortionDataAccessor::TPreparedBatchData TPortionDataAccessor::PrepareForAssemble(const ISnapshotSchema& dataSchema, + const ISnapshotSchema& resultSchema, THashMap& blobsData, const std::optional& defaultSnapshot) const { + return PrepareForAssembleImpl(*this, *PortionInfo, dataSchema, resultSchema, blobsData, defaultSnapshot); +} + +TPortionDataAccessor::TPreparedBatchData TPortionDataAccessor::PrepareForAssemble(const ISnapshotSchema& dataSchema, + const ISnapshotSchema& resultSchema, THashMap& blobsData, + const std::optional& defaultSnapshot) const { + return PrepareForAssembleImpl(*this, *PortionInfo, dataSchema, resultSchema, blobsData, defaultSnapshot); +} + +TConclusion> TPortionDataAccessor::TPreparedColumn::AssembleAccessor() const { + Y_ABORT_UNLESS(!Blobs.empty()); + + NArrow::NAccessor::TCompositeChunkedArray::TBuilder builder(GetField()->type()); + for (auto& blob : Blobs) { + auto chunkedArray = blob.BuildRecordBatch(*Loader); + if (chunkedArray.IsFail()) { + return chunkedArray; + } + builder.AddChunk(chunkedArray.DetachResult()); + } + return builder.Finish(); +} + +std::shared_ptr TPortionDataAccessor::TPreparedColumn::AssembleForSeqAccess() const { + Y_ABORT_UNLESS(!Blobs.empty()); + + std::vector chunks; + chunks.reserve(Blobs.size()); + ui64 recordsCount = 0; + for (auto& blob : Blobs) { + chunks.push_back(blob.BuildDeserializeChunk(Loader)); + if (!!blob.GetData()) { + recordsCount += blob.GetExpectedRowsCountVerified(); + } else { + recordsCount += blob.GetDefaultRowsCount(); + } + } + + return std::make_shared(recordsCount, Loader, std::move(chunks)); +} + +NArrow::NAccessor::TDeserializeChunkedArray::TChunk TPortionDataAccessor::TAssembleBlobInfo::BuildDeserializeChunk( + const std::shared_ptr& loader) const { + if (DefaultRowsCount) { + Y_ABORT_UNLESS(!Data); + auto col = std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader->GetField()->type(), DefaultValue, DefaultRowsCount)); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(col); + } else { + AFL_VERIFY(ExpectedRowsCount); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); + } +} + +TConclusion> TPortionDataAccessor::TAssembleBlobInfo::BuildRecordBatch( + const TColumnLoader& loader) const { + if (DefaultRowsCount) { + Y_ABORT_UNLESS(!Data); + if (NeedCache) { + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader.GetField()->type(), DefaultValue, DefaultRowsCount)); + } else { + return std::make_shared( + NArrow::TStatusValidator::GetValid(arrow::MakeArrayFromScalar(*DefaultValue, DefaultRowsCount))); + } + } else { + AFL_VERIFY(ExpectedRowsCount); + return loader.ApplyConclusion(Data, *ExpectedRowsCount); + } +} + +TConclusion> TPortionDataAccessor::TPreparedBatchData::AssembleToGeneralContainer( + const std::set& sequentialColumnIds) const { + std::vector> columns; + std::vector> fields; + for (auto&& i : Columns) { + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("column", i.GetField()->ToString())("id", i.GetColumnId()); + if (sequentialColumnIds.contains(i.GetColumnId())) { + columns.emplace_back(i.AssembleForSeqAccess()); + } else { + auto conclusion = i.AssembleAccessor(); + if (conclusion.IsFail()) { + return conclusion; + } + columns.emplace_back(conclusion.DetachResult()); + } + fields.emplace_back(i.GetField()); + } + + return std::make_shared(fields, std::move(columns)); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h new file mode 100644 index 000000000000..3487b0f2c1f3 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -0,0 +1,222 @@ +#pragma once +#include "portion_info.h" + +#include + +#include + +#include + +namespace NKikimr::NOlap { + +class TPortionDataAccessor { +private: + const TPortionInfo* PortionInfo; + +public: + TPortionDataAccessor(const TPortionInfo& portionInfo) + : PortionInfo(&portionInfo) + { + + } + + const std::vector& GetRecords() const { + return PortionInfo->Records; + } + + class TAssembleBlobInfo { + private: + YDB_READONLY_DEF(std::optional, ExpectedRowsCount); + ui32 DefaultRowsCount = 0; + std::shared_ptr DefaultValue; + TString Data; + const bool NeedCache = true; + + public: + ui32 GetExpectedRowsCountVerified() const { + AFL_VERIFY(ExpectedRowsCount); + return *ExpectedRowsCount; + } + + void SetExpectedRecordsCount(const ui32 expectedRowsCount) { + AFL_VERIFY(!ExpectedRowsCount); + ExpectedRowsCount = expectedRowsCount; + if (!Data) { + AFL_VERIFY(*ExpectedRowsCount == DefaultRowsCount); + } + } + + TAssembleBlobInfo(const ui32 rowsCount, const std::shared_ptr& defValue, const bool needCache = true) + : DefaultRowsCount(rowsCount) + , DefaultValue(defValue) + , NeedCache(needCache) { + AFL_VERIFY(DefaultRowsCount); + } + + TAssembleBlobInfo(const TString& data) + : Data(data) { + AFL_VERIFY(!!Data); + } + + ui32 GetDefaultRowsCount() const noexcept { + return DefaultRowsCount; + } + + const TString& GetData() const noexcept { + return Data; + } + + bool IsBlob() const { + return !DefaultRowsCount && !!Data; + } + + bool IsDefault() const { + return DefaultRowsCount && !Data; + } + + TConclusion> BuildRecordBatch(const TColumnLoader& loader) const; + NArrow::NAccessor::TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; + }; + + class TPreparedColumn { + private: + std::shared_ptr Loader; + std::vector Blobs; + + public: + ui32 GetColumnId() const { + return Loader->GetColumnId(); + } + + const std::string& GetName() const { + return Loader->GetField()->name(); + } + + std::shared_ptr GetField() const { + return Loader->GetField(); + } + + TPreparedColumn(std::vector&& blobs, const std::shared_ptr& loader) + : Loader(loader) + , Blobs(std::move(blobs)) { + AFL_VERIFY(Loader); + } + + std::shared_ptr AssembleForSeqAccess() const; + TConclusion> AssembleAccessor() const; + }; + + class TPreparedBatchData { + private: + std::vector Columns; + size_t RowsCount = 0; + + public: + struct TAssembleOptions { + std::optional> IncludedColumnIds; + std::optional> ExcludedColumnIds; + std::map> ConstantColumnIds; + + bool IsConstantColumn(const ui32 columnId, std::shared_ptr& scalar) const { + if (ConstantColumnIds.empty()) { + return false; + } + auto it = ConstantColumnIds.find(columnId); + if (it == ConstantColumnIds.end()) { + return false; + } + scalar = it->second; + return true; + } + + bool IsAcceptedColumn(const ui32 columnId) const { + if (IncludedColumnIds && !IncludedColumnIds->contains(columnId)) { + return false; + } + if (ExcludedColumnIds && ExcludedColumnIds->contains(columnId)) { + return false; + } + return true; + } + }; + + std::shared_ptr GetFieldVerified(const ui32 columnId) const { + for (auto&& i : Columns) { + if (i.GetColumnId() == columnId) { + return i.GetField(); + } + } + AFL_VERIFY(false); + return nullptr; + } + + size_t GetColumnsCount() const { + return Columns.size(); + } + + size_t GetRowsCount() const { + return RowsCount; + } + + TPreparedBatchData(std::vector&& columns, const size_t rowsCount) + : Columns(std::move(columns)) + , RowsCount(rowsCount) { + } + + TConclusion> AssembleToGeneralContainer(const std::set& sequentialColumnIds) const; + }; + + class TColumnAssemblingInfo { + private: + std::vector BlobsInfo; + YDB_READONLY(ui32, ColumnId, 0); + const ui32 NumRows; + ui32 NumRowsByChunks = 0; + const std::shared_ptr DataLoader; + const std::shared_ptr ResultLoader; + + public: + TColumnAssemblingInfo( + const ui32 numRows, const std::shared_ptr& dataLoader, const std::shared_ptr& resultLoader) + : ColumnId(resultLoader->GetColumnId()) + , NumRows(numRows) + , DataLoader(dataLoader) + , ResultLoader(resultLoader) { + AFL_VERIFY(ResultLoader); + if (DataLoader) { + AFL_VERIFY(ResultLoader->GetColumnId() == DataLoader->GetColumnId()); + AFL_VERIFY(DataLoader->GetField()->IsCompatibleWith(ResultLoader->GetField()))("data", DataLoader->GetField()->ToString())( + "result", ResultLoader->GetField()->ToString()); + } + } + + const std::shared_ptr& GetField() const { + return ResultLoader->GetField(); + } + + void AddBlobInfo(const ui32 expectedChunkIdx, const ui32 expectedRecordsCount, TAssembleBlobInfo&& info) { + AFL_VERIFY(expectedChunkIdx == BlobsInfo.size()); + info.SetExpectedRecordsCount(expectedRecordsCount); + NumRowsByChunks += expectedRecordsCount; + BlobsInfo.emplace_back(std::move(info)); + } + + TPreparedColumn Compile() { + if (BlobsInfo.empty()) { + BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader ? DataLoader->GetDefaultValue() : ResultLoader->GetDefaultValue())); + return TPreparedColumn(std::move(BlobsInfo), ResultLoader); + } else { + AFL_VERIFY(NumRowsByChunks == NumRows)("by_chunks", NumRowsByChunks)("expected", NumRows); + AFL_VERIFY(DataLoader); + return TPreparedColumn(std::move(BlobsInfo), DataLoader); + } + } + }; + + TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; + TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 757c89195b32..85d170b32cac 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -1,19 +1,13 @@ -#include "portion_info.h" +#include "column_record.h" #include "constructor.h" +#include "portion_info.h" + #include #include -#include #include #include #include #include -#include -#include -#include -#include -#include - -#include namespace NKikimr::NOlap { @@ -65,22 +59,26 @@ ui64 TPortionInfo::GetIndexRawBytes(const bool validation) const { TString TPortionInfo::DebugString(const bool withDetails) const { TStringBuilder sb; - sb << "(portion_id:" << PortionId << ";" << - "path_id:" << PathId << ";records_count:" << NumRows() << ";" - "min_schema_snapshot:(" << MinSnapshotDeprecated.DebugString() << ");" - "schema_version:" << SchemaVersion.value_or(0) << ";" - "level:" << GetMeta().GetCompactionLevel() << ";"; + sb << "(portion_id:" << PortionId << ";" + << "path_id:" << PathId << ";records_count:" << NumRows() + << ";" + "min_schema_snapshot:(" + << MinSnapshotDeprecated.DebugString() + << ");" + "schema_version:" + << SchemaVersion.value_or(0) + << ";" + "level:" + << GetMeta().GetCompactionLevel() << ";"; if (withDetails) { - sb << - "records_snapshot_min:(" << RecordSnapshotMin().DebugString() << ");" << - "records_snapshot_max:(" << RecordSnapshotMax().DebugString() << ");" << - "from:" << IndexKeyStart().DebugString() << ";" << - "to:" << IndexKeyEnd().DebugString() << ";"; - } - sb << - "column_size:" << GetColumnBlobBytes() << ";" << - "index_size:" << GetIndexBlobBytes() << ";" << - "meta:(" << Meta.DebugString() << ");"; + sb << "records_snapshot_min:(" << RecordSnapshotMin().DebugString() << ");" + << "records_snapshot_max:(" << RecordSnapshotMax().DebugString() << ");" + << "from:" << IndexKeyStart().DebugString() << ";" + << "to:" << IndexKeyEnd().DebugString() << ";"; + } + sb << "column_size:" << GetColumnBlobBytes() << ";" + << "index_size:" << GetIndexBlobBytes() << ";" + << "meta:(" << Meta.DebugString() << ");"; if (RemoveSnapshot.Valid()) { sb << "remove_snapshot:(" << RemoveSnapshot.DebugString() << ");"; } @@ -141,12 +139,10 @@ std::vector TPortionInfo::BuildPages() cons TPart(const TColumnRecord* record, const ui32 recordsCount) : Record(record) , RecordsCount(recordsCount) { - } TPart(const TIndexChunk* record, const ui32 recordsCount) : Index(record) , RecordsCount(recordsCount) { - } }; std::map> entities; @@ -203,8 +199,8 @@ std::vector TPortionInfo::BuildPages() cons } ui64 TPortionInfo::GetMetadataMemorySize() const { - return sizeof(TPortionInfo) + Records.size() * (sizeof(TColumnRecord) + 8) + Indexes.size() * sizeof(TIndexChunk) + BlobIds.size() * sizeof(TUnifiedBlobId) - - sizeof(TPortionMeta) + Meta.GetMetadataMemorySize(); + return sizeof(TPortionInfo) + Records.size() * (sizeof(TColumnRecord) + 8) + Indexes.size() * sizeof(TIndexChunk) + + BlobIds.size() * sizeof(TUnifiedBlobId) - sizeof(TPortionMeta) + Meta.GetMetadataMemorySize(); } ui64 TPortionInfo::GetTxVolume() const { @@ -289,7 +285,8 @@ TConclusion TPortionInfo::BuildFromProto( return result; } -THashMap TPortionInfo::DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const { +THashMap TPortionInfo::DecodeBlobAddresses( + NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const { THashMap result; for (auto&& i : blobs) { for (auto&& b : i.second) { @@ -368,7 +365,8 @@ ISnapshotSchema::TPtr TPortionInfo::GetSchema(const TVersionedIndex& index) cons void TPortionInfo::FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const { for (auto&& i : Records) { const TString& storageId = GetColumnStorageId(i.GetColumnId(), indexInfo); - AFL_VERIFY(result[storageId].emplace(RestoreBlobRange(i.GetBlobRange())).second)("blob_id", RestoreBlobRange(i.GetBlobRange()).ToString()); + AFL_VERIFY(result[storageId].emplace(RestoreBlobRange(i.GetBlobRange())).second)( + "blob_id", RestoreBlobRange(i.GetBlobRange()).ToString()); } for (auto&& i : Indexes) { const TString& storageId = GetIndexStorageId(i.GetIndexId(), indexInfo); @@ -435,11 +433,13 @@ void TPortionInfo::FillBlobIdsByStorage(THashMapGetIndexInfo()); } -THashMap>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { +THashMap>> TPortionInfo::RestoreEntityChunks( + NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { THashMap>> result; for (auto&& c : GetRecords()) { const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto chunk = std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId())); + auto chunk = std::make_shared( + blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId())); chunk->SetChunkIdx(c.GetChunkIdx()); AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); } @@ -599,85 +599,6 @@ ui64 TPortionInfo::GetMinMemoryForReadColumns(const std::optional return maxRawBytes; } -namespace { -template -TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& portion, const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& blobsData, const std::optional& defaultSnapshot) { - std::vector columns; - columns.reserve(resultSchema.GetColumnIds().size()); - const ui32 rowsCount = portion.GetRecordsCount(); - for (auto&& i : resultSchema.GetColumnIds()) { - columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i), resultSchema.GetColumnLoaderVerified(i)); - if (portion.HasInsertWriteId()) { - if (portion.HasCommitSnapshot()) { - if (i == (ui32)IIndexInfo::ESpecialColumn::PLAN_STEP) { - columns.back().AddBlobInfo(0, portion.GetRecordsCount(), - TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), - std::make_shared(portion.GetCommitSnapshotVerified().GetPlanStep()), false)); - } - if (i == (ui32)IIndexInfo::ESpecialColumn::TX_ID) { - columns.back().AddBlobInfo(0, portion.GetRecordsCount(), - TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), - std::make_shared(portion.GetCommitSnapshotVerified().GetPlanStep()), false)); - } - } else { - if (i == (ui32)IIndexInfo::ESpecialColumn::PLAN_STEP) { - columns.back().AddBlobInfo(0, portion.GetRecordsCount(), - TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), std::make_shared(defaultSnapshot ? defaultSnapshot->GetPlanStep() : 0))); - } - if (i == (ui32)IIndexInfo::ESpecialColumn::TX_ID) { - columns.back().AddBlobInfo(0, portion.GetRecordsCount(), - TPortionInfo::TAssembleBlobInfo(portion.GetRecordsCount(), - std::make_shared(defaultSnapshot ? defaultSnapshot->GetTxId() : 0))); - } - } - if (i == (ui32)IIndexInfo::ESpecialColumn::WRITE_ID) { - columns.back().AddBlobInfo(0, portion.GetRecordsCount(), - TPortionInfo::TAssembleBlobInfo( - portion.GetRecordsCount(), std::make_shared((ui64)portion.GetInsertWriteIdVerified()), false)); - } - if (i == (ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG) { - columns.back().AddBlobInfo(0, portion.GetRecordsCount(), - TPortionInfo::TAssembleBlobInfo( - portion.GetRecordsCount(), std::make_shared((bool)portion.GetMeta().GetDeletionsCount()), true)); - } - } - } - { - int skipColumnId = -1; - TPortionInfo::TColumnAssemblingInfo* currentAssembler = nullptr; - for (auto& rec : portion.GetRecords()) { - if (skipColumnId == (int)rec.ColumnId) { - continue; - } - if (!currentAssembler || rec.ColumnId != currentAssembler->GetColumnId()) { - const i32 resultPos = resultSchema.GetFieldIndex(rec.ColumnId); - if (resultPos < 0) { - skipColumnId = rec.ColumnId; - continue; - } - AFL_VERIFY((ui32)resultPos < columns.size()); - currentAssembler = &columns[resultPos]; - } - auto it = blobsData.find(rec.GetAddress()); - AFL_VERIFY(it != blobsData.end())("size", blobsData.size())("address", rec.GetAddress().DebugString()); - currentAssembler->AddBlobInfo(rec.Chunk, rec.GetMeta().GetNumRows(), std::move(it->second)); - blobsData.erase(it); - } - } - - // Make chunked arrays for columns - std::vector preparedColumns; - preparedColumns.reserve(columns.size()); - for (auto& c : columns) { - preparedColumns.emplace_back(c.Compile()); - } - - return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), rowsCount); -} - -} - ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoConstructor& portion) { if (!CurrentSchema || portion.GetMinSnapshotDeprecatedVerified() != LastSnapshot) { CurrentSchema = portion.GetSchema(VersionedIndex); @@ -687,16 +608,6 @@ ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoC return CurrentSchema; } -TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& blobsData, const std::optional& defaultSnapshot) const { - return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData, defaultSnapshot); -} - -TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& blobsData, const std::optional& defaultSnapshot) const { - return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData, defaultSnapshot); -} - bool TPortionInfo::NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) const { if (ShardingVersion && shardingInfo.GetSnapshotVersion() <= *ShardingVersion) { return false; @@ -728,86 +639,4 @@ void TPortionInfo::Precalculate() { } } -TConclusion> TPortionInfo::TPreparedColumn::AssembleAccessor() const { - Y_ABORT_UNLESS(!Blobs.empty()); - - NArrow::NAccessor::TCompositeChunkedArray::TBuilder builder(GetField()->type()); - for (auto& blob : Blobs) { - auto chunkedArray = blob.BuildRecordBatch(*Loader); - if (chunkedArray.IsFail()) { - return chunkedArray; - } - builder.AddChunk(chunkedArray.DetachResult()); - } - return builder.Finish(); -} - -std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { - Y_ABORT_UNLESS(!Blobs.empty()); - - std::vector chunks; - chunks.reserve(Blobs.size()); - ui64 recordsCount = 0; - for (auto& blob : Blobs) { - chunks.push_back(blob.BuildDeserializeChunk(Loader)); - if (!!blob.GetData()) { - recordsCount += blob.GetExpectedRowsCountVerified(); - } else { - recordsCount += blob.GetDefaultRowsCount(); - } - } - - return std::make_shared(recordsCount, Loader, std::move(chunks)); -} - -NArrow::NAccessor::TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk( - const std::shared_ptr& loader) const { - if (DefaultRowsCount) { - Y_ABORT_UNLESS(!Data); - auto col = std::make_shared( - NArrow::TThreadSimpleArraysCache::Get(loader->GetField()->type(), DefaultValue, DefaultRowsCount)); - return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(col); - } else { - AFL_VERIFY(ExpectedRowsCount); - return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); - } -} - -TConclusion> TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { - if (DefaultRowsCount) { - Y_ABORT_UNLESS(!Data); - if (NeedCache) { - return std::make_shared( - NArrow::TThreadSimpleArraysCache::Get(loader.GetField()->type(), DefaultValue, DefaultRowsCount)); - } else { - return std::make_shared( - NArrow::TStatusValidator::GetValid(arrow::MakeArrayFromScalar(*DefaultValue, DefaultRowsCount))); - } - } else { - AFL_VERIFY(ExpectedRowsCount); - return loader.ApplyConclusion(Data, *ExpectedRowsCount); - } -} - -TConclusion> TPortionInfo::TPreparedBatchData::AssembleToGeneralContainer( - const std::set& sequentialColumnIds) const { - std::vector> columns; - std::vector> fields; - for (auto&& i : Columns) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("column", i.GetField()->ToString())("id", i.GetColumnId()); - if (sequentialColumnIds.contains(i.GetColumnId())) { - columns.emplace_back(i.AssembleForSeqAccess()); - } else { - auto conclusion = i.AssembleAccessor(); - if (conclusion.IsFail()) { - return conclusion; - } - columns.emplace_back(conclusion.DetachResult()); - } - fields.emplace_back(i.GetField()); - } - - return std::make_shared(fields, std::move(columns)); -} - -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 7c408471ef40..d275b64c9716 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -1,19 +1,17 @@ #pragma once #include "column_record.h" +#include "common.h" #include "index_chunk.h" #include "meta.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include -#include +#include +#include + +#include namespace NKikimrColumnShardDataSharingProto { class TPortionInfo; @@ -36,6 +34,7 @@ class TEntityChunk { YDB_READONLY(ui32, RecordsCount, 0); YDB_READONLY(ui64, RawBytes, 0); YDB_READONLY_DEF(TBlobRangeLink16, BlobRange); + public: const TChunkAddress& GetAddress() const { return Address; @@ -45,29 +44,31 @@ class TEntityChunk { : Address(address) , RecordsCount(recordsCount) , RawBytes(rawBytesSize) - , BlobRange(blobRange) - { - + , BlobRange(blobRange) { } }; class TPortionInfoConstructor; class TGranuleShardingInfo; +class TPortionDataAccessor; class TPortionInfo { public: using TRuntimeFeatures = ui8; - enum class ERuntimeFeature: TRuntimeFeatures { + enum class ERuntimeFeature : TRuntimeFeatures { Optimized = 1 /* "optimized" */ }; + private: + friend class TPortionDataAccessor; + friend class TPortionInfoConstructor; + ui64 PrecalculatedColumnRawBytes = 0; ui64 PrecalculatedColumnBlobBytes = 0; bool Precalculated = false; void Precalculate(); - friend class TPortionInfoConstructor; TPortionInfo(TPortionMeta&& meta) : Meta(std::move(meta)) { if (HasInsertWriteId()) { @@ -79,8 +80,9 @@ class TPortionInfo { ui64 PathId = 0; ui64 PortionId = 0; // Id of independent (overlayed by PK) portion of data in pathId - TSnapshot MinSnapshotDeprecated = TSnapshot::Zero(); // {PlanStep, TxId} is min snapshot for {Granule, Portion} - TSnapshot RemoveSnapshot = TSnapshot::Zero(); // {XPlanStep, XTxId} is snapshot where the blob has been removed (i.e. compacted into another one) + TSnapshot MinSnapshotDeprecated = TSnapshot::Zero(); // {PlanStep, TxId} is min snapshot for {Granule, Portion} + TSnapshot RemoveSnapshot = + TSnapshot::Zero(); // {XPlanStep, XTxId} is snapshot where the blob has been removed (i.e. compacted into another one) std::optional SchemaVersion; std::optional ShardingVersion; @@ -108,7 +110,8 @@ class TPortionInfo { } template - static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::set* columnIds, const bool validation) { + static void AggregateIndexChunksData( + const TAggregator& aggr, const std::vector& chunks, const std::set* columnIds, const bool validation) { if (columnIds) { auto itColumn = columnIds->begin(); auto itRecord = chunks.begin(); @@ -117,7 +120,8 @@ class TPortionInfo { if (itRecord->GetEntityId() < *itColumn) { ++itRecord; } else if (*itColumn < itRecord->GetEntityId()) { - AFL_VERIFY(!validation || recordsInEntityCount)("problem", "validation")("reason", "no_chunks_for_column")("column_id", *itColumn); + AFL_VERIFY(!validation || recordsInEntityCount)("problem", "validation")("reason", "no_chunks_for_column")( + "column_id", *itColumn); ++itColumn; recordsInEntityCount = 0; } else { @@ -254,7 +258,8 @@ class TPortionInfo { void ReorderChunks(); - THashMap>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; + THashMap>> RestoreEntityChunks( + NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); @@ -275,7 +280,7 @@ class TPortionInfo { const TString& GetIndexStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const; const TString& GetEntityStorageId(const ui32 entityId, const TIndexInfo& indexInfo) const; - ui64 GetTxVolume() const; // fake-correct method for determ volume on rewrite this portion in transaction progress + ui64 GetTxVolume() const; // fake-correct method for determ volume on rewrite this portion in transaction progress ui64 GetMetadataMemorySize() const; class TPage { @@ -283,13 +288,12 @@ class TPortionInfo { YDB_READONLY_DEF(std::vector, Records); YDB_READONLY_DEF(std::vector, Indexes); YDB_READONLY(ui32, RecordsCount, 0); + public: TPage(std::vector&& records, std::vector&& indexes, const ui32 recordsCount) : Records(std::move(records)) , Indexes(std::move(indexes)) - , RecordsCount(recordsCount) - { - + , RecordsCount(recordsCount) { } }; @@ -420,8 +424,12 @@ class TPortionInfo { return false; } - bool ValidSnapshotInfo() const { return MinSnapshotDeprecated.Valid() && PathId && PortionId; } - size_t NumChunks() const { return Records.size(); } + bool ValidSnapshotInfo() const { + return MinSnapshotDeprecated.Valid() && PathId && PortionId; + } + size_t NumChunks() const { + return Records.size(); + } TString DebugString(const bool withDetails = false) const; @@ -461,7 +469,6 @@ class TPortionInfo { PortionId = id; } - const TSnapshot& GetMinSnapshotDeprecated() const { return MinSnapshotDeprecated; } @@ -492,7 +499,8 @@ class TPortionInfo { const bool visible = (Meta.RecordSnapshotMin <= snapshot) && (!RemoveSnapshot.Valid() || snapshot < RemoveSnapshot) && (!checkCommitSnapshot || !CommitSnapshot || *CommitSnapshot <= snapshot); - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "IsVisible")("analyze_portion", DebugString())("visible", visible)("snapshot", snapshot.DebugString()); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "IsVisible")("analyze_portion", DebugString())("visible", visible)( + "snapshot", snapshot.DebugString()); return visible; } @@ -530,7 +538,6 @@ class TPortionInfo { } } - THashMap> GetBlobIdsByStorage(const TIndexInfo& indexInfo) const { THashMap> result; FillBlobIdsByStorage(result, indexInfo); @@ -541,10 +548,11 @@ class TPortionInfo { const NOlap::TVersionedIndex& VersionedIndex; ISnapshotSchema::TPtr CurrentSchema; TSnapshot LastSnapshot = TSnapshot::Zero(); + public: TSchemaCursor(const NOlap::TVersionedIndex& versionedIndex) - : VersionedIndex(versionedIndex) - {} + : VersionedIndex(versionedIndex) { + } ISnapshotSchema::TPtr GetSchema(const TPortionInfoConstructor& portion); @@ -617,197 +625,8 @@ class TPortionInfo { ui64 GetTotalRawBytes() const { return GetColumnRawBytes() + GetIndexRawBytes(); } -public: - class TAssembleBlobInfo { - private: - YDB_READONLY_DEF(std::optional, ExpectedRowsCount); - ui32 DefaultRowsCount = 0; - std::shared_ptr DefaultValue; - TString Data; - const bool NeedCache = true; - public: - ui32 GetExpectedRowsCountVerified() const { - AFL_VERIFY(ExpectedRowsCount); - return *ExpectedRowsCount; - } - - void SetExpectedRecordsCount(const ui32 expectedRowsCount) { - AFL_VERIFY(!ExpectedRowsCount); - ExpectedRowsCount = expectedRowsCount; - if (!Data) { - AFL_VERIFY(*ExpectedRowsCount == DefaultRowsCount); - } - } - - TAssembleBlobInfo(const ui32 rowsCount, const std::shared_ptr& defValue, const bool needCache = true) - : DefaultRowsCount(rowsCount) - , DefaultValue(defValue) - , NeedCache(needCache) - { - AFL_VERIFY(DefaultRowsCount); - } - - TAssembleBlobInfo(const TString& data) - : Data(data) { - AFL_VERIFY(!!Data); - } - - ui32 GetDefaultRowsCount() const noexcept { - return DefaultRowsCount; - } - - const TString& GetData() const noexcept { - return Data; - } - - bool IsBlob() const { - return !DefaultRowsCount && !!Data; - } - - bool IsDefault() const { - return DefaultRowsCount && !Data; - } - - TConclusion> BuildRecordBatch(const TColumnLoader& loader) const; - NArrow::NAccessor::TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; - }; - - class TPreparedColumn { - private: - std::shared_ptr Loader; - std::vector Blobs; - public: - ui32 GetColumnId() const { - return Loader->GetColumnId(); - } - - const std::string& GetName() const { - return Loader->GetField()->name(); - } - - std::shared_ptr GetField() const { - return Loader->GetField(); - } - - TPreparedColumn(std::vector&& blobs, const std::shared_ptr& loader) - : Loader(loader) - , Blobs(std::move(blobs)) { - AFL_VERIFY(Loader); - } - - std::shared_ptr AssembleForSeqAccess() const; - TConclusion> AssembleAccessor() const; - }; - - class TPreparedBatchData { - private: - std::vector Columns; - size_t RowsCount = 0; - public: - struct TAssembleOptions { - std::optional> IncludedColumnIds; - std::optional> ExcludedColumnIds; - std::map> ConstantColumnIds; - - bool IsConstantColumn(const ui32 columnId, std::shared_ptr& scalar) const { - if (ConstantColumnIds.empty()) { - return false; - } - auto it = ConstantColumnIds.find(columnId); - if (it == ConstantColumnIds.end()) { - return false; - } - scalar = it->second; - return true; - } - - bool IsAcceptedColumn(const ui32 columnId) const { - if (IncludedColumnIds && !IncludedColumnIds->contains(columnId)) { - return false; - } - if (ExcludedColumnIds && ExcludedColumnIds->contains(columnId)) { - return false; - } - return true; - } - }; - - std::shared_ptr GetFieldVerified(const ui32 columnId) const { - for (auto&& i : Columns) { - if (i.GetColumnId() == columnId) { - return i.GetField(); - } - } - AFL_VERIFY(false); - return nullptr; - } - - size_t GetColumnsCount() const { - return Columns.size(); - } - - size_t GetRowsCount() const { - return RowsCount; - } - - TPreparedBatchData(std::vector&& columns, const size_t rowsCount) - : Columns(std::move(columns)) - , RowsCount(rowsCount) { - } - - TConclusion> AssembleToGeneralContainer(const std::set& sequentialColumnIds) const; - }; - - class TColumnAssemblingInfo { - private: - std::vector BlobsInfo; - YDB_READONLY(ui32, ColumnId, 0); - const ui32 NumRows; - ui32 NumRowsByChunks = 0; - const std::shared_ptr DataLoader; - const std::shared_ptr ResultLoader; - public: - TColumnAssemblingInfo(const ui32 numRows, const std::shared_ptr& dataLoader, const std::shared_ptr& resultLoader) - : ColumnId(resultLoader->GetColumnId()) - , NumRows(numRows) - , DataLoader(dataLoader) - , ResultLoader(resultLoader) { - AFL_VERIFY(ResultLoader); - if (DataLoader) { - AFL_VERIFY(ResultLoader->GetColumnId() == DataLoader->GetColumnId()); - AFL_VERIFY(DataLoader->GetField()->IsCompatibleWith(ResultLoader->GetField()))("data", DataLoader->GetField()->ToString())("result", ResultLoader->GetField()->ToString()); - } - } - - const std::shared_ptr& GetField() const { - return ResultLoader->GetField(); - } - - void AddBlobInfo(const ui32 expectedChunkIdx, const ui32 expectedRecordsCount, TAssembleBlobInfo&& info) { - AFL_VERIFY(expectedChunkIdx == BlobsInfo.size()); - info.SetExpectedRecordsCount(expectedRecordsCount); - NumRowsByChunks += expectedRecordsCount; - BlobsInfo.emplace_back(std::move(info)); - } - - TPreparedColumn Compile() { - if (BlobsInfo.empty()) { - BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader ? DataLoader->GetDefaultValue() : ResultLoader->GetDefaultValue())); - return TPreparedColumn(std::move(BlobsInfo), ResultLoader); - } else { - AFL_VERIFY(NumRowsByChunks == NumRows)("by_chunks", NumRowsByChunks)("expected", NumRows); - AFL_VERIFY(DataLoader); - return TPreparedColumn(std::move(BlobsInfo), DataLoader); - } - } - }; - - TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; - TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; - friend IOutputStream& operator << (IOutputStream& out, const TPortionInfo& info) { + friend IOutputStream& operator<<(IOutputStream& out, const TPortionInfo& info) { out << info.DebugString(); return out; } @@ -819,4 +638,4 @@ static_assert(std::is_nothrow_move_assignable::value); /// Ensure that TPortionInfo can be effectively constructed by moving the value. static_assert(std::is_nothrow_move_constructible::value); -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 67b94cb1ba45..d27f12498951 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -1,5 +1,7 @@ +#include "data_accessor.h" #include "read_with_blobs.h" #include "write_with_blobs.h" + #include #include #include @@ -20,12 +22,14 @@ TConclusion> TReadPortionInfoWithBlob blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); } - return PortionInfo.PrepareForAssemble(data, resultSchema, blobs).AssembleToGeneralContainer(seqColumns); + return TPortionDataAccessor(PortionInfo).PrepareForAssemble(data, resultSchema, blobs).AssembleToGeneralContainer(seqColumns); } -NKikimr::NOlap::TReadPortionInfoWithBlobs TReadPortionInfoWithBlobs::RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) { +NKikimr::NOlap::TReadPortionInfoWithBlobs TReadPortionInfoWithBlobs::RestorePortion( + const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) { TReadPortionInfoWithBlobs result(portion); - THashMap>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); + THashMap>> records = + result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); for (auto&& [storageId, chunksByAddress] : records) { for (auto&& [_, chunk] : chunksByAddress) { result.RestoreChunk(chunk); @@ -34,8 +38,8 @@ NKikimr::NOlap::TReadPortionInfoWithBlobs TReadPortionInfoWithBlobs::RestorePort return result; } -std::vector TReadPortionInfoWithBlobs::RestorePortions(const std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, - const TVersionedIndex& tables) { +std::vector TReadPortionInfoWithBlobs::RestorePortions( + const std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TVersionedIndex& tables) { std::vector result; for (auto&& i : portions) { const auto schema = i.GetSchema(tables); @@ -59,7 +63,8 @@ std::vector> TReadPortionInfoWithBlobs::GetEn return result; } -bool TReadPortionInfoWithBlobs::ExtractColumnChunks(const ui32 entityId, std::vector& records, std::vector>& chunks) { +bool TReadPortionInfoWithBlobs::ExtractColumnChunks( + const ui32 entityId, std::vector& records, std::vector>& chunks) { records = GetPortionInfo().GetColumnChunksPointers(entityId); if (records.empty()) { return false; @@ -79,8 +84,8 @@ bool TReadPortionInfoWithBlobs::ExtractColumnChunks(const ui32 entityId, std::ve } std::optional TReadPortionInfoWithBlobs::SyncPortion(TReadPortionInfoWithBlobs&& source, - const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, const std::shared_ptr& storages, - std::shared_ptr counters) { + const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, + const std::shared_ptr& storages, std::shared_ptr counters) { if (from->GetVersion() == to->GetVersion() && targetTier == source.GetPortionInfo().GetTierNameDef(IStoragesManager::DefaultStorageId)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "we don't need sync portion"); return {}; @@ -133,4 +138,4 @@ const TString& TReadPortionInfoWithBlobs::GetBlobByAddressVerified(const ui32 co return it->second->GetData(); } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/ya.make b/ydb/core/tx/columnshard/engines/portions/ya.make index ced1ad706c50..8619f322b52f 100644 --- a/ydb/core/tx/columnshard/engines/portions/ya.make +++ b/ydb/core/tx/columnshard/engines/portions/ya.make @@ -11,6 +11,7 @@ SRCS( meta.cpp common.cpp index_chunk.cpp + data_accessor.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h index b535c2bc4673..4633859c4651 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -16,7 +17,7 @@ namespace NKikimr::NOlap { class TFetchedData { protected: - using TBlobs = THashMap; + using TBlobs = THashMap; YDB_ACCESSOR_DEF(TBlobs, Blobs); YDB_READONLY_DEF(std::shared_ptr, Table); YDB_READONLY_DEF(std::shared_ptr, Filter); @@ -59,7 +60,7 @@ class TFetchedData { } } - void AddDefaults(THashMap&& blobs) { + void AddDefaults(THashMap&& blobs) { for (auto&& i : blobs) { AFL_VERIFY(Blobs.emplace(i.first, std::move(i.second)).second); } @@ -103,7 +104,6 @@ class TFetchedData { } else { AddFilter(*filter); } - } void AddFilter(const NArrow::TColumnFilter& filter) { diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index ab93985e6d1d..310ee70f169d 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -4,13 +4,15 @@ #include "plain_read_data.h" #include "source.h" -#include #include #include +#include #include #include #include +#include + namespace NKikimr::NOlap::NReader::NPlain { void IDataSource::InitFetchingPlan(const std::shared_ptr& fetching) { @@ -52,7 +54,7 @@ void IDataSource::SetIsReady() { } void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, - THashMap& defaultBlocks, const std::shared_ptr& filter) { + THashMap& defaultBlocks, const std::shared_ptr& filter) { const NArrow::TColumnFilter& cFilter = filter ? *filter : NArrow::TColumnFilter::BuildAllowFilter(); ui32 fetchedChunks = 0; ui32 nullChunks = 0; @@ -71,8 +73,8 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); ++fetchedChunks; } else { - defaultBlocks.emplace(c->GetAddress(), - TPortionInfo::TAssembleBlobInfo(c->GetMeta().GetNumRows(), Schema->GetExternalDefaultValueVerified(c->GetColumnId()))); + defaultBlocks.emplace(c->GetAddress(), TPortionDataAccessor::TAssembleBlobInfo(c->GetMeta().GetNumRows(), + Schema->GetExternalDefaultValueVerified(c->GetColumnId()))); ++nullChunks; } itFinished = !itFilter.Next(c->GetMeta().GetNumRows()); @@ -93,7 +95,7 @@ bool TPortionDataSource::DoStartFetchingColumns( TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); { - THashMap nullBlocks; + THashMap nullBlocks; NeedFetchColumns(columnIds, action, nullBlocks, StageData->GetAppliedFilter()); StageData->AddDefaults(std::move(nullBlocks)); } @@ -198,8 +200,10 @@ void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& c } } - auto batch = Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs(), ss) - .AssembleToGeneralContainer(SequentialEntityIds).DetachResult(); + auto batch = TPortionDataAccessor(Portion.get()) + ->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs(), ss) + .AssembleToGeneralContainer(SequentialEntityIds) + .DetachResult(); MutableStageData().AddBatch(batch); } @@ -226,7 +230,8 @@ bool TCommittedDataSource::DoStartFetchingColumns( void TCommittedDataSource::DoAssembleColumns(const std::shared_ptr& columns) { TMemoryProfileGuard mGuard("SCAN_PROFILE::ASSEMBLER::COMMITTED", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); - const ISnapshotSchema::TPtr batchSchema = GetContext()->GetReadMetadata()->GetIndexVersions().GetSchemaVerified(GetCommitted().GetSchemaVersion()); + const ISnapshotSchema::TPtr batchSchema = + GetContext()->GetReadMetadata()->GetIndexVersions().GetSchemaVerified(GetCommitted().GetSchemaVersion()); const ISnapshotSchema::TPtr resultSchema = GetContext()->GetReadMetadata()->GetResultSchema(); if (!GetStageData().GetTable()) { AFL_VERIFY(GetStageData().GetBlobs().size() == 1); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index 80755276ea5e..893f143a4617 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -269,7 +269,7 @@ class TPortionDataSource: public IDataSource { mutable THashMap FingerprintedData; void NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, - THashMap& nullBlocks, const std::shared_ptr& filter); + THashMap& nullBlocks, const std::shared_ptr& filter); virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexChecker) override; virtual bool DoStartFetchingColumns( From 7b55d22e4cdaaf989c25ac5560ab9f7cb7351957 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 13:17:49 +0300 Subject: [PATCH 14/31] fix --- .../columnshard/normalizer/portion/chunks.cpp | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp index 6901760d5e55..53004cb86752 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp @@ -1,16 +1,17 @@ #include "chunks.h" #include "normalizer.h" +#include +#include #include #include -#include - namespace NKikimr::NOlap { class TChunksNormalizer::TNormalizerResult: public INormalizerChanges { std::vector Chunks; std::shared_ptr> Schemas; + public: TNormalizerResult(std::vector&& chunks) : Chunks(std::move(chunks)) { @@ -21,17 +22,15 @@ class TChunksNormalizer::TNormalizerResult: public INormalizerChanges { NIceDb::TNiceDb db(txc.DB); for (auto&& chunkInfo : Chunks) { - NKikimrTxColumnShard::TIndexColumnMeta metaProto = chunkInfo.GetMetaProto(); metaProto.SetNumRows(chunkInfo.GetUpdate().GetNumRows()); metaProto.SetRawBytes(chunkInfo.GetUpdate().GetRawBytes()); const auto& key = chunkInfo.GetKey(); - db.Table().Key(key.GetIndex(), key.GetGranule(), key.GetColumnIdx(), - key.GetPlanStep(), key.GetTxId(), key.GetPortion(), key.GetChunk()).Update( - NIceDb::TUpdate(metaProto.SerializeAsString()) - ); + db.Table() + .Key(key.GetIndex(), key.GetGranule(), key.GetColumnIdx(), key.GetPlanStep(), key.GetTxId(), key.GetPortion(), key.GetChunk()) + .Update(NIceDb::TUpdate(metaProto.SerializeAsString())); } return true; } @@ -44,10 +43,12 @@ class TChunksNormalizer::TNormalizerResult: public INormalizerChanges { class TRowsAndBytesChangesTask: public NConveyor::ITask { public: using TDataContainer = std::vector; + private: NBlobOperations::NRead::TCompositeReadBlobs Blobs; std::vector Chunks; TNormalizationContext NormContext; + protected: virtual TConclusionStatus DoExecute(const std::shared_ptr& /*taskPtr*/) override { for (auto&& chunkInfo : Chunks) { @@ -58,7 +59,7 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { auto columnLoader = chunkInfo.GetLoader(); Y_ABORT_UNLESS(!!columnLoader); - TPortionInfo::TAssembleBlobInfo assembleBlob(blobData); + TPortionDataAccessor::TAssembleBlobInfo assembleBlob(blobData); assembleBlob.SetExpectedRecordsCount(chunkInfo.GetRecordsCount()); auto batch = assembleBlob.BuildRecordBatch(*columnLoader).DetachResult(); Y_ABORT_UNLESS(!!batch); @@ -68,16 +69,18 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { } auto changes = std::make_shared(std::move(Chunks)); - TActorContext::AsActorContext().Send(NormContext.GetShardActor(), std::make_unique(changes)); + TActorContext::AsActorContext().Send( + NormContext.GetShardActor(), std::make_unique(changes)); return TConclusionStatus::Success(); } public: - TRowsAndBytesChangesTask(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TNormalizationContext& nCtx, std::vector&& chunks, std::shared_ptr>) + TRowsAndBytesChangesTask(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TNormalizationContext& nCtx, + std::vector&& chunks, std::shared_ptr>) : Blobs(std::move(blobs)) , Chunks(std::move(chunks)) - , NormContext(nCtx) - {} + , NormContext(nCtx) { + } virtual TString GetTaskClassIdentifier() const override { const static TString name = "TRowsAndBytesChangesTask"; @@ -97,7 +100,8 @@ void TChunksNormalizer::TChunkInfo::InitSchema(const NColumnShard::TTablesManage Schema = tm.GetPrimaryIndexSafe().GetVersionedIndex().GetSchema(NOlap::TSnapshot(Key.GetPlanStep(), Key.GetTxId())); } -TConclusion> TChunksNormalizer::DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { +TConclusion> TChunksNormalizer::DoInit( + const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { using namespace NColumnShard; NIceDb::TNiceDb db(txc.DB); @@ -160,4 +164,4 @@ TConclusion> TChunksNormalizer::DoInit(const return tasks; } -} +} // namespace NKikimr::NOlap From a572c4124a85eb5578fbacc5f50fcdfdba86bff2 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 13:18:42 +0300 Subject: [PATCH 15/31] fix --- .../columnshard/engines/reader/plain_reader/iterator/source.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index 310ee70f169d..14985da88ed5 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -200,7 +200,7 @@ void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& c } } - auto batch = TPortionDataAccessor(Portion.get()) + auto batch = TPortionDataAccessor(*Portion) ->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs(), ss) .AssembleToGeneralContainer(SequentialEntityIds) .DetachResult(); From dcb1363b8fb911f6312f7473b4463c7a795d8c16 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 13:21:47 +0300 Subject: [PATCH 16/31] fix build --- .../columnshard/engines/reader/plain_reader/iterator/source.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index 14985da88ed5..d282f0853dea 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -201,7 +201,7 @@ void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& c } auto batch = TPortionDataAccessor(*Portion) - ->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs(), ss) + .PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs(), ss) .AssembleToGeneralContainer(SequentialEntityIds) .DetachResult(); From 0222073cf99182e47560b599e4c511dfa0cef11c Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 15:37:47 +0300 Subject: [PATCH 17/31] TPortionDataAccessor logic into separated class --- .../destination/events/transfer.cpp | 15 +- .../destination/session/destination.cpp | 27 ++- .../transactions/tx_data_ack_to_source.cpp | 6 +- .../engines/changes/cleanup_portions.cpp | 10 +- .../engines/changes/compaction.cpp | 17 +- .../engines/changes/general_compaction.h | 14 -- .../tx/columnshard/engines/changes/ttl.cpp | 27 +-- .../engines/portions/data_accessor.cpp | 198 ++++++++++++++++++ .../engines/portions/data_accessor.h | 41 +++- .../engines/portions/portion_info.cpp | 149 ------------- .../engines/portions/portion_info.h | 69 ------ .../engines/portions/read_with_blobs.cpp | 4 +- .../reader/plain_reader/iterator/source.cpp | 4 +- .../reader/plain_reader/iterator/source.h | 4 +- .../columnshard/engines/scheme/index_info.h | 1 + .../columnshard/hooks/testing/controller.cpp | 14 +- .../normalizer/portion/broken_blobs.cpp | 3 +- .../columnshard/normalizer/portion/clean.cpp | 44 ++-- 18 files changed, 340 insertions(+), 307 deletions(-) diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp index cd90a7353322..5d248e1f4d9f 100644 --- a/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp @@ -1,16 +1,19 @@ #include "transfer.h" -#include + #include +#include #include +#include namespace NKikimr::NOlap::NDataSharing::NEvents { THashMap TPathIdData::BuildLinkTabletTasks( - const std::shared_ptr& storages, const TTabletId selfTabletId, const TTransferContext& context, const TVersionedIndex& index) { + const std::shared_ptr& storages, const TTabletId selfTabletId, const TTransferContext& context, + const TVersionedIndex& index) { THashMap> blobIds; for (auto&& i : Portions) { auto schema = i.GetSchema(index); - i.FillBlobIdsByStorage(blobIds, schema->GetIndexInfo()); + TPortionDataAccessor(i).FillBlobIdsByStorage(blobIds, schema->GetIndexInfo()); } const std::shared_ptr sharedBlobs = storages->GetSharedBlobsManager(); @@ -51,7 +54,9 @@ THashMap storageTabletTasks; for (auto&& [_, blobInfo] : blobs) { - THashMap blobTabletTasks = context.GetMoving() ? blobInfo.BuildTabletTasksOnMove(context, selfTabletId, storageId) : blobInfo.BuildTabletTasksOnCopy(context, selfTabletId, storageId); + THashMap blobTabletTasks = context.GetMoving() + ? blobInfo.BuildTabletTasksOnMove(context, selfTabletId, storageId) + : blobInfo.BuildTabletTasksOnCopy(context, selfTabletId, storageId); for (auto&& [tId, tInfo] : blobTabletTasks) { auto itTablet = storageTabletTasks.find(tId); if (itTablet == storageTabletTasks.end()) { @@ -71,4 +76,4 @@ THashMap #include #include +#include #include namespace NKikimr::NOlap::NDataSharing { -NKikimr::TConclusionStatus TDestinationSession::DataReceived(THashMap&& data, TColumnEngineForLogs& index, const std::shared_ptr& /*manager*/) { +NKikimr::TConclusionStatus TDestinationSession::DataReceived( + THashMap&& data, TColumnEngineForLogs& index, const std::shared_ptr& /*manager*/) { auto guard = index.GranulesStorage->GetStats()->StartPackModification(); for (auto&& i : data) { auto it = PathIds.find(i.first); @@ -66,8 +68,8 @@ void TDestinationSession::SendCurrentCursorAck(const NColumnShard::TColumnShard& AFL_VERIFY(found); } -NKikimr::TConclusion> TDestinationSession::ReceiveData( - NColumnShard::TColumnShard* self, const THashMap& data, const ui32 receivedPackIdx, const TTabletId sourceTabletId, +NKikimr::TConclusion> TDestinationSession::ReceiveData(NColumnShard::TColumnShard* self, + const THashMap& data, const ui32 receivedPackIdx, const TTabletId sourceTabletId, const std::shared_ptr& selfPtr) { auto result = GetCursorVerified(sourceTabletId).ReceiveData(receivedPackIdx); if (!result) { @@ -76,18 +78,21 @@ NKikimr::TConclusion> TDestin return std::unique_ptr(new TTxDataFromSource(self, selfPtr, data, sourceTabletId)); } -NKikimr::TConclusion> TDestinationSession::ReceiveFinished(NColumnShard::TColumnShard* self, const TTabletId sourceTabletId, const std::shared_ptr& selfPtr) { +NKikimr::TConclusion> TDestinationSession::ReceiveFinished( + NColumnShard::TColumnShard* self, const TTabletId sourceTabletId, const std::shared_ptr& selfPtr) { if (GetCursorVerified(sourceTabletId).GetDataFinished()) { return TConclusionStatus::Fail("session finished already"); } return std::unique_ptr(new TTxFinishFromSource(self, sourceTabletId, selfPtr)); } -NKikimr::TConclusion> TDestinationSession::AckInitiatorFinished(NColumnShard::TColumnShard* self, const std::shared_ptr& selfPtr) { +NKikimr::TConclusion> TDestinationSession::AckInitiatorFinished( + NColumnShard::TColumnShard* self, const std::shared_ptr& selfPtr) { return std::unique_ptr(new TTxFinishAckFromInitiator(self, selfPtr)); } -NKikimr::TConclusionStatus TDestinationSession::DeserializeDataFromProto(const NKikimrColumnShardDataSharingProto::TDestinationSession& proto, const TColumnEngineForLogs& index) { +NKikimr::TConclusionStatus TDestinationSession::DeserializeDataFromProto( + const NKikimrColumnShardDataSharingProto::TDestinationSession& proto, const TColumnEngineForLogs& index) { if (!InitiatorController.DeserializeFromProto(proto.GetInitiatorController())) { return TConclusionStatus::Fail("cannot parse initiator controller: " + proto.GetInitiatorController().DebugString()); } @@ -139,7 +144,8 @@ NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor TDestinatio return result; } -NKikimr::TConclusionStatus TDestinationSession::DeserializeCursorFromProto(const NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor& proto) { +NKikimr::TConclusionStatus TDestinationSession::DeserializeCursorFromProto( + const NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor& proto) { ConfirmedFlag = proto.GetConfirmedFlag(); for (auto&& i : proto.GetSourceCursors()) { TSourceCursorForDestination cursor; @@ -154,13 +160,14 @@ NKikimr::TConclusionStatus TDestinationSession::DeserializeCursorFromProto(const return TConclusionStatus::Success(); } -bool TDestinationSession::DoStart(const NColumnShard::TColumnShard& shard, const THashMap>>& portions) { +bool TDestinationSession::DoStart( + const NColumnShard::TColumnShard& shard, const THashMap>>& portions) { AFL_VERIFY(IsConfirmed()); NYDBTest::TControllers::GetColumnShardController()->OnDataSharingStarted(shard.TabletID(), GetSessionId()); THashMap> local; for (auto&& i : portions) { for (auto&& p : i.second) { - p->FillBlobIdsByStorage(local, shard.GetIndexAs().GetVersionedIndex()); + TPortionDataAccessor(*p).FillBlobIdsByStorage(local, shard.GetIndexAs().GetVersionedIndex()); } } std::swap(CurrentBlobIds, local); @@ -170,7 +177,7 @@ bool TDestinationSession::DoStart(const NColumnShard::TColumnShard& shard, const bool TDestinationSession::TryTakePortionBlobs(const TVersionedIndex& vIndex, const TPortionInfo& portion) { THashMap> blobIds; - portion.FillBlobIdsByStorage(blobIds, vIndex); + TPortionDataAccessor(portion).FillBlobIdsByStorage(blobIds, vIndex); ui32 containsCounter = 0; ui32 newCounter = 0; for (auto&& i : blobIds) { diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp index d5c37846be9d..591659b283ec 100644 --- a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp @@ -1,5 +1,7 @@ #include "tx_data_ack_to_source.h" + #include +#include namespace NKikimr::NOlap::NDataSharing { @@ -11,7 +13,7 @@ bool TTxDataAckToSource::DoExecute(NTabletFlatExecutor::TTransactionContext& txc auto& index = Self->GetIndexAs().GetVersionedIndex(); for (auto&& [_, i] : Session->GetCursorVerified()->GetPreviousSelected()) { for (auto&& portion : i.GetPortions()) { - portion.FillBlobIdsByStorage(sharedBlobIds, index); + TPortionDataAccessor(portion).FillBlobIdsByStorage(sharedBlobIds, index); } } for (auto&& i : sharedBlobIds) { @@ -31,4 +33,4 @@ void TTxDataAckToSource::DoComplete(const TActorContext& /*ctx*/) { Session->ActualizeDestination(*Self, Self->GetDataLocksManager()); } -} \ No newline at end of file +} // namespace NKikimr::NOlap::NDataSharing diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp index 7917b77682b9..e5908882e80e 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp @@ -1,8 +1,10 @@ #include "cleanup_portions.h" -#include -#include + #include +#include #include +#include +#include namespace NKikimr::NOlap { @@ -23,7 +25,7 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TC THashMap> blobIdsByStorage; for (auto&& p : PortionsToDrop) { p.RemoveFromDatabase(context.DBWrapper); - p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); + TPortionDataAccessor(p).FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); pathIds.emplace(p.GetPathId()); } for (auto&& i : blobIdsByStorage) { @@ -60,4 +62,4 @@ NColumnShard::ECumulativeCounters TCleanupPortionsColumnEngineChanges::GetCounte return isSuccess ? NColumnShard::COUNTER_CLEANUP_SUCCESS : NColumnShard::COUNTER_CLEANUP_FAIL; } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.cpp b/ydb/core/tx/columnshard/engines/changes/compaction.cpp index b0eb17e90200..3613898c390f 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction.cpp @@ -1,8 +1,10 @@ #include "compaction.h" + +#include +#include #include +#include #include -#include -#include namespace NKikimr::NOlap { @@ -33,12 +35,12 @@ void TCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { THashMap> blobRanges; auto& index = self.GetIndexAs().GetVersionedIndex(); for (const auto& p : SwitchedPortions) { - p.FillBlobRangesByStorage(blobRanges, index); + TPortionDataAccessor(p).FillBlobRangesByStorage(blobRanges, index); } for (const auto& p : blobRanges) { auto action = BlobsAction.GetReading(p.first); - for (auto&& b: p.second) { + for (auto&& b : p.second) { action->AddRange(b); } } @@ -66,7 +68,8 @@ void TCompactColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, T NeedGranuleStatusProvide = false; } -TCompactColumnEngineChanges::TCompactColumnEngineChanges(std::shared_ptr granule, const std::vector>& portions, const TSaverContext& saverContext) +TCompactColumnEngineChanges::TCompactColumnEngineChanges( + std::shared_ptr granule, const std::vector>& portions, const TSaverContext& saverContext) : TBase(saverContext, NBlobOperations::EConsumer::GENERAL_COMPACTION) , GranuleMeta(granule) { Y_ABORT_UNLESS(GranuleMeta); @@ -78,11 +81,11 @@ TCompactColumnEngineChanges::TCompactColumnEngineChanges(std::shared_ptrGetPathId() == GranuleMeta->GetPathId()); } -// Y_ABORT_UNLESS(SwitchedPortions.size()); + // Y_ABORT_UNLESS(SwitchedPortions.size()); } TCompactColumnEngineChanges::~TCompactColumnEngineChanges() { Y_DEBUG_ABORT_UNLESS(!NActors::TlsActivationContext || !NeedGranuleStatusProvide); } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.h b/ydb/core/tx/columnshard/engines/changes/general_compaction.h index 4e24cbf2967a..9e47ef9ed018 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.h +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.h @@ -47,20 +47,6 @@ class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { } using TBase::TBase; - class TMemoryPredictorSimplePolicy: public IMemoryPredictor { - private: - ui64 SumMemory = 0; - - public: - virtual ui64 AddPortion(const TPortionInfo& portionInfo) override { - for (auto&& i : portionInfo.GetRecords()) { - SumMemory += i.BlobRange.Size; - SumMemory += 2 * i.GetMeta().GetRawBytes(); - } - return SumMemory; - } - }; - class TMemoryPredictorChunkedPolicy: public IMemoryPredictor { private: ui64 SumMemoryDelta = 0; diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.cpp b/ydb/core/tx/columnshard/engines/changes/ttl.cpp index 9774130b561f..6a9c7b8a22c4 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.cpp +++ b/ydb/core/tx/columnshard/engines/changes/ttl.cpp @@ -1,10 +1,12 @@ #include "ttl.h" -#include + +#include #include -#include -#include #include -#include +#include +#include +#include +#include namespace NKikimr::NOlap { @@ -19,7 +21,7 @@ void TTTLColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { auto& engine = self.MutableIndexAs(); auto& index = engine.GetVersionedIndex(); for (const auto& p : PortionsToEvict) { - p.GetPortionInfo().FillBlobRangesByStorage(blobRanges, index); + TPortionDataAccessor(p.GetPortionInfo()).FillBlobRangesByStorage(blobRanges, index); } for (auto&& i : blobRanges) { auto action = BlobsAction.GetReading(i.first); @@ -45,17 +47,18 @@ void TTTLColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChan } } -std::optional TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, - TConstructionContext& context) const -{ +std::optional TTTLColumnEngineChanges::UpdateEvictedPortion( + TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, TConstructionContext& context) const { const TPortionInfo& portionInfo = info.GetPortionInfo(); auto& evictFeatures = info.GetFeatures(); auto blobSchema = portionInfo.GetSchema(context.SchemaVersions); - Y_ABORT_UNLESS(portionInfo.GetMeta().GetTierName() != evictFeatures.GetTargetTierName() || blobSchema->GetVersion() < evictFeatures.GetTargetScheme()->GetVersion()); + Y_ABORT_UNLESS(portionInfo.GetMeta().GetTierName() != evictFeatures.GetTargetTierName() || + blobSchema->GetVersion() < evictFeatures.GetTargetScheme()->GetVersion()); auto portionWithBlobs = TReadPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs, blobSchema->GetIndexInfo()); - std::optional result = TReadPortionInfoWithBlobs::SyncPortion( - std::move(portionWithBlobs), blobSchema, evictFeatures.GetTargetScheme(), evictFeatures.GetTargetTierName(), SaverContext.GetStoragesManager(), context.Counters.SplitterCounters); + std::optional result = + TReadPortionInfoWithBlobs::SyncPortion(std::move(portionWithBlobs), blobSchema, evictFeatures.GetTargetScheme(), + evictFeatures.GetTargetTierName(), SaverContext.GetStoragesManager(), context.Counters.SplitterCounters); return std::move(result); } @@ -78,4 +81,4 @@ NColumnShard::ECumulativeCounters TTTLColumnEngineChanges::GetCounterIndex(const return isSuccess ? NColumnShard::COUNTER_TTL_SUCCESS : NColumnShard::COUNTER_TTL_FAIL; } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp index a440b06efa48..bf0f8e5eaead 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp @@ -2,6 +2,10 @@ #include #include +#include +#include +#include +#include #include #include @@ -101,6 +105,200 @@ TPortionDataAccessor::TPreparedBatchData TPortionDataAccessor::PrepareForAssembl return PrepareForAssembleImpl(*this, *PortionInfo, dataSchema, resultSchema, blobsData, defaultSnapshot); } +void TPortionDataAccessor::FillBlobRangesByStorage(THashMap>& result, const TVersionedIndex& index) const { + auto schema = PortionInfo->GetSchema(index); + return FillBlobRangesByStorage(result, schema->GetIndexInfo()); +} + +void TPortionDataAccessor::FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const { + for (auto&& i : PortionInfo->Records) { + const TString& storageId = PortionInfo->GetColumnStorageId(i.GetColumnId(), indexInfo); + AFL_VERIFY(result[storageId].emplace(PortionInfo->RestoreBlobRange(i.GetBlobRange())).second)( + "blob_id", PortionInfo->RestoreBlobRange(i.GetBlobRange()).ToString()); + } + for (auto&& i : PortionInfo->Indexes) { + const TString& storageId = PortionInfo->GetIndexStorageId(i.GetIndexId(), indexInfo); + if (auto bRange = i.GetBlobRangeOptional()) { + AFL_VERIFY(result[storageId].emplace(PortionInfo->RestoreBlobRange(*bRange)).second)( + "blob_id", PortionInfo->RestoreBlobRange(*bRange).ToString()); + } + } +} + +void TPortionDataAccessor::FillBlobIdsByStorage(THashMap>& result, const TIndexInfo& indexInfo) const { + THashMap> local; + THashSet* currentHashLocal = nullptr; + THashSet* currentHashResult = nullptr; + std::optional lastEntityId; + TString lastStorageId; + ui32 lastBlobIdx = PortionInfo->BlobIds.size(); + for (auto&& i : PortionInfo->Records) { + if (!lastEntityId || *lastEntityId != i.GetEntityId()) { + const TString& storageId = PortionInfo->GetColumnStorageId(i.GetEntityId(), indexInfo); + lastEntityId = i.GetEntityId(); + if (storageId != lastStorageId) { + currentHashResult = &result[storageId]; + currentHashLocal = &local[storageId]; + lastStorageId = storageId; + lastBlobIdx = PortionInfo->BlobIds.size(); + } + } + if (lastBlobIdx != i.GetBlobRange().GetBlobIdxVerified() && currentHashLocal->emplace(i.GetBlobRange().GetBlobIdxVerified()).second) { + auto blobId = PortionInfo->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); + AFL_VERIFY(currentHashResult); + AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); + lastBlobIdx = i.GetBlobRange().GetBlobIdxVerified(); + } + } + for (auto&& i : PortionInfo->Indexes) { + if (!lastEntityId || *lastEntityId != i.GetEntityId()) { + const TString& storageId = PortionInfo->GetIndexStorageId(i.GetEntityId(), indexInfo); + lastEntityId = i.GetEntityId(); + if (storageId != lastStorageId) { + currentHashResult = &result[storageId]; + currentHashLocal = &local[storageId]; + lastStorageId = storageId; + lastBlobIdx = PortionInfo->BlobIds.size(); + } + } + if (auto bRange = i.GetBlobRangeOptional()) { + if (lastBlobIdx != bRange->GetBlobIdxVerified() && currentHashLocal->emplace(bRange->GetBlobIdxVerified()).second) { + auto blobId = PortionInfo->GetBlobId(bRange->GetBlobIdxVerified()); + AFL_VERIFY(currentHashResult); + AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); + lastBlobIdx = bRange->GetBlobIdxVerified(); + } + } + } +} + +void TPortionDataAccessor::FillBlobIdsByStorage(THashMap>& result, const TVersionedIndex& index) const { + auto schema = PortionInfo->GetSchema(index); + return FillBlobIdsByStorage(result, schema->GetIndexInfo()); +} + +THashMap>> +TPortionDataAccessor::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { + THashMap>> result; + for (auto&& c : PortionInfo->Records) { + const TString& storageId = PortionInfo->GetColumnStorageId(c.GetColumnId(), indexInfo); + auto chunk = std::make_shared( + blobs.Extract(storageId, PortionInfo->RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId())); + chunk->SetChunkIdx(c.GetChunkIdx()); + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); + } + for (auto&& c : PortionInfo->Indexes) { + const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); + const TString blobData = [&]() -> TString { + if (auto bRange = c.GetBlobRangeOptional()) { + return blobs.Extract(storageId, PortionInfo->RestoreBlobRange(*bRange)); + } else if (auto data = c.GetBlobDataOptional()) { + return *data; + } else { + AFL_VERIFY(false); + Y_UNREACHABLE(); + } + }(); + auto chunk = std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobData); + chunk->SetChunkIdx(c.GetChunkIdx()); + + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); + } + return result; +} + +THashMap TPortionDataAccessor::DecodeBlobAddresses( + NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const { + THashMap result; + for (auto&& i : blobs) { + for (auto&& b : i.second) { + bool found = false; + TString columnStorageId; + ui32 columnId = 0; + for (auto&& record : PortionInfo->Records) { + if (PortionInfo->RestoreBlobRange(record.GetBlobRange()) == b.first) { + if (columnId != record.GetColumnId()) { + columnStorageId = PortionInfo->GetColumnStorageId(record.GetColumnId(), indexInfo); + } + if (columnStorageId != i.first) { + continue; + } + result.emplace(record.GetAddress(), std::move(b.second)); + found = true; + break; + } + } + if (found) { + continue; + } + for (auto&& record : PortionInfo->Indexes) { + if (!record.HasBlobRange()) { + continue; + } + if (PortionInfo->RestoreBlobRange(record.GetBlobRangeVerified()) == b.first) { + if (columnId != record.GetIndexId()) { + columnStorageId = indexInfo.GetIndexStorageId(record.GetIndexId()); + } + if (columnStorageId != i.first) { + continue; + } + result.emplace(record.GetAddress(), std::move(b.second)); + found = true; + break; + } + } + AFL_VERIFY(found)("blobs", blobs.DebugString())("records", DebugString())("problem", b.first); + } + } + return result; +} + +bool TPortionDataAccessor::HasEntityAddress(const TChunkAddress& address) const { + { + auto it = std::lower_bound( + PortionInfo->Records.begin(), PortionInfo->Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != PortionInfo->Records.end() && it->GetAddress() == address) { + return true; + } + } + { + auto it = std::lower_bound( + PortionInfo->Indexes.begin(), PortionInfo->Indexes.end(), address, [](const TIndexChunk& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != PortionInfo->Indexes.end() && it->GetAddress() == address) { + return true; + } + } + return false; +} + +const NKikimr::NOlap::TColumnRecord* TPortionDataAccessor::GetRecordPointer(const TChunkAddress& address) const { + auto it = std::lower_bound( + PortionInfo->Records.begin(), PortionInfo->Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != PortionInfo->Records.end() && it->GetAddress() == address) { + return &*it; + } + return nullptr; +} + +TString TPortionDataAccessor::DebugString() const { + TStringBuilder sb; + sb << "chunks:(" << PortionInfo->Records.size() << ");"; + if (IS_TRACE_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { + std::vector blobRanges; + for (auto&& i : PortionInfo->Records) { + blobRanges.emplace_back(PortionInfo->RestoreBlobRange(i.BlobRange)); + } + sb << "blobs:" << JoinSeq(",", blobRanges) << ";ranges_count:" << blobRanges.size() << ";"; + } + return sb << ")"; +} + TConclusion> TPortionDataAccessor::TPreparedColumn::AssembleAccessor() const { Y_ABORT_UNLESS(!Blobs.empty()); diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index 3487b0f2c1f3..6a675ae125d7 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -9,21 +9,56 @@ namespace NKikimr::NOlap { +namespace NBlobOperations::NRead { +class TCompositeReadBlobs; +} + class TPortionDataAccessor { private: const TPortionInfo* PortionInfo; public: TPortionDataAccessor(const TPortionInfo& portionInfo) - : PortionInfo(&portionInfo) - { - + : PortionInfo(&portionInfo) { } const std::vector& GetRecords() const { return PortionInfo->Records; } + void FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; + void FillBlobRangesByStorage(THashMap>& result, const TVersionedIndex& index) const; + void FillBlobIdsByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; + void FillBlobIdsByStorage(THashMap>& result, const TVersionedIndex& index) const; + + THashMap>> RestoreEntityChunks( + NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; + + THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const; + + THashMap> GetBlobIdsByStorage(const TIndexInfo& indexInfo) const { + THashMap> result; + FillBlobIdsByStorage(result, indexInfo); + return result; + } + + const TColumnRecord* GetRecordPointer(const TChunkAddress& address) const; + + bool HasEntityAddress(const TChunkAddress& address) const; + + bool HasIndexes(const std::set& ids) const { + auto idsCopy = ids; + for (auto&& i : PortionInfo->Indexes) { + idsCopy.erase(i.GetIndexId()); + if (idsCopy.empty()) { + return true; + } + } + return false; + } + + TString DebugString() const; + class TAssembleBlobInfo { private: YDB_READONLY_DEF(std::optional, ExpectedRowsCount); diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 85d170b32cac..410196fb8c07 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -2,11 +2,9 @@ #include "constructor.h" #include "portion_info.h" -#include #include #include #include -#include #include namespace NKikimr::NOlap { @@ -285,52 +283,6 @@ TConclusion TPortionInfo::BuildFromProto( return result; } -THashMap TPortionInfo::DecodeBlobAddresses( - NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const { - THashMap result; - for (auto&& i : blobs) { - for (auto&& b : i.second) { - bool found = false; - TString columnStorageId; - ui32 columnId = 0; - for (auto&& record : Records) { - if (RestoreBlobRange(record.GetBlobRange()) == b.first) { - if (columnId != record.GetColumnId()) { - columnStorageId = GetColumnStorageId(record.GetColumnId(), indexInfo); - } - if (columnStorageId != i.first) { - continue; - } - result.emplace(record.GetAddress(), std::move(b.second)); - found = true; - break; - } - } - if (found) { - continue; - } - for (auto&& record : Indexes) { - if (!record.HasBlobRange()) { - continue; - } - if (RestoreBlobRange(record.GetBlobRangeVerified()) == b.first) { - if (columnId != record.GetIndexId()) { - columnStorageId = indexInfo.GetIndexStorageId(record.GetIndexId()); - } - if (columnStorageId != i.first) { - continue; - } - result.emplace(record.GetAddress(), std::move(b.second)); - found = true; - break; - } - } - AFL_VERIFY(found)("blobs", blobs.DebugString())("records", DebugString(true))("problem", b.first); - } - } - return result; -} - const TString& TPortionInfo::GetColumnStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const { if (HasInsertWriteId()) { return { NBlobOperations::TGlobal::DefaultStorageId }; @@ -362,107 +314,6 @@ ISnapshotSchema::TPtr TPortionInfo::GetSchema(const TVersionedIndex& index) cons return index.GetSchema(MinSnapshotDeprecated); } -void TPortionInfo::FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const { - for (auto&& i : Records) { - const TString& storageId = GetColumnStorageId(i.GetColumnId(), indexInfo); - AFL_VERIFY(result[storageId].emplace(RestoreBlobRange(i.GetBlobRange())).second)( - "blob_id", RestoreBlobRange(i.GetBlobRange()).ToString()); - } - for (auto&& i : Indexes) { - const TString& storageId = GetIndexStorageId(i.GetIndexId(), indexInfo); - if (auto bRange = i.GetBlobRangeOptional()) { - AFL_VERIFY(result[storageId].emplace(RestoreBlobRange(*bRange)).second)("blob_id", RestoreBlobRange(*bRange).ToString()); - } - } -} - -void TPortionInfo::FillBlobRangesByStorage(THashMap>& result, const TVersionedIndex& index) const { - auto schema = GetSchema(index); - return FillBlobRangesByStorage(result, schema->GetIndexInfo()); -} - -void TPortionInfo::FillBlobIdsByStorage(THashMap>& result, const TIndexInfo& indexInfo) const { - THashMap> local; - THashSet* currentHashLocal = nullptr; - THashSet* currentHashResult = nullptr; - std::optional lastEntityId; - TString lastStorageId; - ui32 lastBlobIdx = BlobIds.size(); - for (auto&& i : Records) { - if (!lastEntityId || *lastEntityId != i.GetEntityId()) { - const TString& storageId = GetColumnStorageId(i.GetEntityId(), indexInfo); - lastEntityId = i.GetEntityId(); - if (storageId != lastStorageId) { - currentHashResult = &result[storageId]; - currentHashLocal = &local[storageId]; - lastStorageId = storageId; - lastBlobIdx = BlobIds.size(); - } - } - if (lastBlobIdx != i.GetBlobRange().GetBlobIdxVerified() && currentHashLocal->emplace(i.GetBlobRange().GetBlobIdxVerified()).second) { - auto blobId = GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - AFL_VERIFY(currentHashResult); - AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); - lastBlobIdx = i.GetBlobRange().GetBlobIdxVerified(); - } - } - for (auto&& i : Indexes) { - if (!lastEntityId || *lastEntityId != i.GetEntityId()) { - const TString& storageId = GetIndexStorageId(i.GetEntityId(), indexInfo); - lastEntityId = i.GetEntityId(); - if (storageId != lastStorageId) { - currentHashResult = &result[storageId]; - currentHashLocal = &local[storageId]; - lastStorageId = storageId; - lastBlobIdx = BlobIds.size(); - } - } - if (auto bRange = i.GetBlobRangeOptional()) { - if (lastBlobIdx != bRange->GetBlobIdxVerified() && currentHashLocal->emplace(bRange->GetBlobIdxVerified()).second) { - auto blobId = GetBlobId(bRange->GetBlobIdxVerified()); - AFL_VERIFY(currentHashResult); - AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); - lastBlobIdx = bRange->GetBlobIdxVerified(); - } - } - } -} - -void TPortionInfo::FillBlobIdsByStorage(THashMap>& result, const TVersionedIndex& index) const { - auto schema = GetSchema(index); - return FillBlobIdsByStorage(result, schema->GetIndexInfo()); -} - -THashMap>> TPortionInfo::RestoreEntityChunks( - NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { - THashMap>> result; - for (auto&& c : GetRecords()) { - const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto chunk = std::make_shared( - blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId())); - chunk->SetChunkIdx(c.GetChunkIdx()); - AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); - } - for (auto&& c : GetIndexes()) { - const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); - const TString blobData = [&]() -> TString { - if (auto bRange = c.GetBlobRangeOptional()) { - return blobs.Extract(storageId, RestoreBlobRange(*bRange)); - } else if (auto data = c.GetBlobDataOptional()) { - return *data; - } else { - AFL_VERIFY(false); - Y_UNREACHABLE(); - } - }(); - auto chunk = std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobData); - chunk->SetChunkIdx(c.GetChunkIdx()); - - AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); - } - return result; -} - void TPortionInfo::ReorderChunks() { { auto pred = [](const TColumnRecord& l, const TColumnRecord& r) { diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index d275b64c9716..29942b314aec 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -244,23 +244,8 @@ class TPortionInfo { } void FullValidation() const; - - bool HasIndexes(const std::set& ids) const { - auto idsCopy = ids; - for (auto&& i : Indexes) { - idsCopy.erase(i.GetIndexId()); - if (idsCopy.empty()) { - return true; - } - } - return false; - } - void ReorderChunks(); - THashMap>> RestoreEntityChunks( - NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; - const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); } @@ -274,8 +259,6 @@ class TPortionInfo { return BlobIds.size(); } - THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const; - const TString& GetColumnStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const; const TString& GetIndexStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const; const TString& GetEntityStorageId(const ui32 entityId, const TIndexInfo& indexInfo) const; @@ -394,36 +377,6 @@ class TPortionInfo { return Meta; } - const TColumnRecord* GetRecordPointer(const TChunkAddress& address) const { - auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { - return item.GetAddress() < address; - }); - if (it != Records.end() && it->GetAddress() == address) { - return &*it; - } - return nullptr; - } - - bool HasEntityAddress(const TChunkAddress& address) const { - { - auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { - return item.GetAddress() < address; - }); - if (it != Records.end() && it->GetAddress() == address) { - return true; - } - } - { - auto it = std::lower_bound(Indexes.begin(), Indexes.end(), address, [](const TIndexChunk& item, const TChunkAddress& address) { - return item.GetAddress() < address; - }); - if (it != Indexes.end() && it->GetAddress() == address) { - return true; - } - } - return false; - } - bool ValidSnapshotInfo() const { return MinSnapshotDeprecated.Valid() && PathId && PortionId; } @@ -538,12 +491,6 @@ class TPortionInfo { } } - THashMap> GetBlobIdsByStorage(const TIndexInfo& indexInfo) const { - THashMap> result; - FillBlobIdsByStorage(result, indexInfo); - return result; - } - class TSchemaCursor { const NOlap::TVersionedIndex& VersionedIndex; ISnapshotSchema::TPtr CurrentSchema; @@ -568,12 +515,6 @@ class TPortionInfo { ISnapshotSchema::TPtr GetSchema(const TVersionedIndex& index) const; - void FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; - void FillBlobRangesByStorage(THashMap>& result, const TVersionedIndex& index) const; - - void FillBlobIdsByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; - void FillBlobIdsByStorage(THashMap>& result, const TVersionedIndex& index) const; - ui32 GetRecordsCount() const { ui32 result = 0; std::optional columnIdFirst; @@ -592,16 +533,6 @@ class TPortionInfo { return GetRecordsCount(); } - ui32 NumRows(const ui32 columnId) const { - ui32 result = 0; - for (auto&& i : Records) { - if (columnId == i.ColumnId) { - result += i.GetMeta().GetNumRows(); - } - } - return result; - } - ui64 GetIndexRawBytes(const std::set& columnIds, const bool validation = true) const; ui64 GetIndexRawBytes(const bool validation = true) const; ui64 GetIndexBlobBytes() const noexcept { diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index d27f12498951..1723bc2fbb51 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -11,7 +11,7 @@ namespace NKikimr::NOlap { void TReadPortionInfoWithBlobs::RestoreChunk(const std::shared_ptr& chunk) { auto address = chunk->GetChunkAddressVerified(); - AFL_VERIFY(GetPortionInfo().HasEntityAddress(address))("address", address.DebugString()); + AFL_VERIFY(TPortionDataAccessor(PortionInfo).HasEntityAddress(address))("address", address.DebugString()); AFL_VERIFY(Chunks.emplace(address, chunk).second)("address", address.DebugString()); } @@ -29,7 +29,7 @@ NKikimr::NOlap::TReadPortionInfoWithBlobs TReadPortionInfoWithBlobs::RestorePort const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) { TReadPortionInfoWithBlobs result(portion); THashMap>> records = - result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); + TPortionDataAccessor(result.PortionInfo).RestoreEntityChunks(blobs, indexInfo); for (auto&& [storageId, chunksByAddress] : records) { for (auto&& [_, chunk] : chunksByAddress) { result.RestoreChunk(chunk); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index d282f0853dea..c3cb0ea27b84 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -63,7 +63,7 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob if (columnChunks.empty()) { continue; } - auto itFilter = cFilter.GetIterator(false, Portion->NumRows(i)); + auto itFilter = cFilter.GetIterator(false, Portion->NumRows()); bool itFinished = false; for (auto&& c : columnChunks) { AFL_VERIFY(!itFinished); @@ -79,7 +79,7 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob } itFinished = !itFilter.Next(c->GetMeta().GetNumRows()); } - AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows(i)); + AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows()); } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)( "reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index 893f143a4617..c64b9a0cc3ca 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -327,11 +327,11 @@ class TPortionDataSource: public IDataSource { } virtual bool HasIndexes(const std::set& indexIds) const override { - return Portion->HasIndexes(indexIds); + return TPortionDataAccessor(*Portion).HasIndexes(indexIds); } virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const override { - return Portion->DecodeBlobAddresses(std::move(blobsOriginal), Schema->GetIndexInfo()); + return TPortionDataAccessor(*Portion).DecodeBlobAddresses(std::move(blobsOriginal), Schema->GetIndexInfo()); } virtual bool IsSourceInMemory(const std::set& fieldIds) const override { diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index e46e2dac8779..fcea496b720f 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -50,6 +50,7 @@ struct TIndexInfo: public IIndexInfo { private: using TColumns = THashMap; friend class TPortionInfo; + friend class TPortionDataAccessor; class TNameInfo { private: diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.cpp b/ydb/core/tx/columnshard/hooks/testing/controller.cpp index 9cf3a7e7e9b5..c2028b4ff4fe 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.cpp +++ b/ydb/core/tx/columnshard/hooks/testing/controller.cpp @@ -1,11 +1,14 @@ #include "controller.h" -#include + #include -#include +#include #include #include #include +#include #include +#include + #include namespace NKikimr::NYDBTest::NColumnShard { @@ -31,7 +34,7 @@ void TController::CheckInvariants(const ::NKikimr::NColumnShard::TColumnShard& s THashMap> ids; for (auto&& i : granules) { for (auto&& p : i->GetPortions()) { - p.second->FillBlobIdsByStorage(ids, index.GetVersionedIndex()); + NOlap::TPortionDataAccessor(*p.second).FillBlobIdsByStorage(ids, index.GetVersionedIndex()); } } for (auto&& i : ids) { @@ -118,7 +121,8 @@ bool TController::IsTrivialLinks() const { return true; } -::NKikimr::NColumnShard::TBlobPutResult::TPtr TController::OverrideBlobPutResultOnCompaction(const ::NKikimr::NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& actions) const { +::NKikimr::NColumnShard::TBlobPutResult::TPtr TController::OverrideBlobPutResultOnCompaction( + const ::NKikimr::NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& actions) const { if (IndexWriteControllerEnabled) { return original; } @@ -138,4 +142,4 @@ ::NKikimr::NColumnShard::TBlobPutResult::TPtr TController::OverrideBlobPutResult return result; } -} +} // namespace NKikimr::NYDBTest::NColumnShard diff --git a/ydb/core/tx/columnshard/normalizer/portion/broken_blobs.cpp b/ydb/core/tx/columnshard/normalizer/portion/broken_blobs.cpp index 699cd0bebc66..afb9f7541049 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/broken_blobs.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/broken_blobs.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -163,7 +164,7 @@ INormalizerTask::TPtr TNormalizer::BuildTask( for (auto&& portion : portions) { auto schemaPtr = schemas->FindPtr(portion->GetPortionId()); THashMap> blobsByStorage; - portion->FillBlobRangesByStorage(blobsByStorage, schemaPtr->get()->GetIndexInfo()); + TPortionDataAccessor(*portion).FillBlobRangesByStorage(blobsByStorage, schemaPtr->get()->GetIndexInfo()); if (blobsByStorage.size() > 1 || !blobsByStorage.contains(NBlobOperations::TGlobal::DefaultStorageId)) { continue; } diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp index d1e00669f8b3..e7f0cde53e8c 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp @@ -1,23 +1,23 @@ #include "clean.h" -#include -#include +#include #include +#include +#include +#include #include -#include - - namespace NKikimr::NOlap { -class TBlobsRemovingResult : public INormalizerChanges { +class TBlobsRemovingResult: public INormalizerChanges { std::shared_ptr RemovingAction; std::vector> Portions; + public: TBlobsRemovingResult(std::shared_ptr removingAction, std::vector>&& portions) : RemovingAction(removingAction) - , Portions(std::move(portions)) - {} + , Portions(std::move(portions)) { + } bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { NOlap::TBlobManagerDb blobManagerDb(txc.DB); @@ -25,7 +25,8 @@ class TBlobsRemovingResult : public INormalizerChanges { TDbWrapper db(txc.DB, nullptr); for (auto&& portion : Portions) { - AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("message", "remove lost portion")("path_id", portion->GetPathId())("portion_id", portion->GetPortionId()); + AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("message", "remove lost portion")("path_id", portion->GetPathId())( + "portion_id", portion->GetPortionId()); portion->RemoveFromDatabase(db); } return true; @@ -40,36 +41,40 @@ class TBlobsRemovingResult : public INormalizerChanges { } }; -class TBlobsRemovingTask : public INormalizerTask { +class TBlobsRemovingTask: public INormalizerTask { std::vector Blobs; std::vector> Portions; + public: TBlobsRemovingTask(std::vector&& blobs, std::vector>&& portions) : Blobs(std::move(blobs)) - , Portions(std::move(portions)) - {} + , Portions(std::move(portions)) { + } void Start(const TNormalizationController& controller, const TNormalizationContext& nCtx) override { controller.GetCounters().CountObjects(Blobs.size()); - auto removeAction = controller.GetStoragesManager()->GetDefaultOperator()->StartDeclareRemovingAction(NBlobOperations::EConsumer::NORMALIZER); + auto removeAction = + controller.GetStoragesManager()->GetDefaultOperator()->StartDeclareRemovingAction(NBlobOperations::EConsumer::NORMALIZER); for (auto&& blobId : Blobs) { removeAction->DeclareSelfRemove(blobId); } - TActorContext::AsActorContext().Send(nCtx.GetShardActor(), std::make_unique(std::make_shared(removeAction, std::move(Portions)))); + TActorContext::AsActorContext().Send( + nCtx.GetShardActor(), std::make_unique( + std::make_shared(removeAction, std::move(Portions)))); } }; - bool TCleanPortionsNormalizer::CheckPortion(const NColumnShard::TTablesManager& tablesManager, const TPortionInfo& portionInfo) const { return tablesManager.HasTable(portionInfo.GetAddress().GetPathId(), true); } -INormalizerTask::TPtr TCleanPortionsNormalizer::BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const { +INormalizerTask::TPtr TCleanPortionsNormalizer::BuildTask( + std::vector>&& portions, std::shared_ptr> schemas) const { std::vector blobIds; THashMap> blobsByStorage; for (auto&& portion : portions) { auto schemaPtr = schemas->FindPtr(portion->GetPortionId()); - portion->FillBlobIdsByStorage(blobsByStorage, schemaPtr->get()->GetIndexInfo()); + TPortionDataAccessor(*portion).FillBlobIdsByStorage(blobsByStorage, schemaPtr->get()->GetIndexInfo()); } for (auto&& [storageId, blobs] : blobsByStorage) { if (storageId == NBlobOperations::TGlobal::DefaultStorageId) { @@ -84,9 +89,8 @@ INormalizerTask::TPtr TCleanPortionsNormalizer::BuildTask(std::vector(std::move(blobIds), std::move(portions)); } - TConclusion TCleanPortionsNormalizer::DoInitImpl(const TNormalizationController&, NTabletFlatExecutor::TTransactionContext&) { +TConclusion TCleanPortionsNormalizer::DoInitImpl(const TNormalizationController&, NTabletFlatExecutor::TTransactionContext&) { return true; } - -} +} // namespace NKikimr::NOlap From 22b60ed85a850f326969635f46a50c9ce7bf8522 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:06:20 +0300 Subject: [PATCH 18/31] corrections --- ydb/core/tx/columnshard/counters/portions.cpp | 8 +- .../engines/changes/cleanup_portions.cpp | 2 +- .../tx/columnshard/engines/column_engine.cpp | 2 +- .../engines/column_engine_logs.cpp | 2 +- .../columnshard/engines/insert_table/meta.h | 4 +- .../engines/portions/column_record.cpp | 4 +- .../engines/portions/column_record.h | 10 +- .../engines/portions/data_accessor.cpp | 197 ++++++++++++++++- .../engines/portions/data_accessor.h | 84 +++++++- .../engines/portions/portion_info.cpp | 204 +----------------- .../engines/portions/portion_info.h | 95 ++------ .../engines/portions/read_with_blobs.cpp | 2 +- .../reader/plain_reader/iterator/source.cpp | 10 +- .../reader/plain_reader/iterator/source.h | 13 +- .../reader/sys_view/portions/portions.cpp | 2 +- .../storage/actualizer/counters/counters.h | 4 +- .../engines/storage/granule/granule.h | 4 +- .../optimizer/lbuckets/planner/optimizer.h | 4 +- .../optimizer/sbuckets/common/optimizer.h | 4 +- .../optimizer/sbuckets/counters/counters.h | 4 +- .../columnshard/normalizer/portion/chunks.h | 2 +- .../splitter/abstract/chunk_meta.cpp | 2 +- .../splitter/abstract/chunk_meta.h | 7 +- 23 files changed, 337 insertions(+), 333 deletions(-) diff --git a/ydb/core/tx/columnshard/counters/portions.cpp b/ydb/core/tx/columnshard/counters/portions.cpp index c3e0c6dbf071..b32189171294 100644 --- a/ydb/core/tx/columnshard/counters/portions.cpp +++ b/ydb/core/tx/columnshard/counters/portions.cpp @@ -4,14 +4,14 @@ namespace NKikimr::NColumnShard { void TPortionCategoryCounters::AddPortion(const std::shared_ptr& p) { - RecordsCount->Add(p->NumRows()); + RecordsCount->Add(p->GetRecordsCount()); Count->Add(1); BlobBytes->Add(p->GetTotalBlobBytes()); RawBytes->Add(p->GetTotalRawBytes()); } void TPortionCategoryCounters::RemovePortion(const std::shared_ptr& p) { - RecordsCount->Remove(p->NumRows()); + RecordsCount->Remove(p->GetRecordsCount()); Count->Remove(1); BlobBytes->Remove(p->GetTotalBlobBytes()); RawBytes->Remove(p->GetTotalRawBytes()); @@ -29,7 +29,7 @@ void TSimplePortionsGroupInfo::AddPortion(const TPortionInfo& p) { BlobBytes += p.GetTotalBlobBytes(); RawBytes += p.GetTotalRawBytes(); Count += 1; - RecordsCount += p.NumRows(); + RecordsCount += p.GetRecordsCount(); ChunksCount += p.GetChunksCount(); } @@ -41,7 +41,7 @@ void TSimplePortionsGroupInfo::RemovePortion(const TPortionInfo& p) { BlobBytes -= p.GetTotalBlobBytes(); RawBytes -= p.GetTotalRawBytes(); Count -= 1; - RecordsCount -= p.NumRows(); + RecordsCount -= p.GetRecordsCount(); ChunksCount -= p.GetChunksCount(); AFL_VERIFY(RawBytes >= 0); AFL_VERIFY(BlobBytes >= 0); diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp index e5908882e80e..0c1e5fb77548 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp @@ -45,7 +45,7 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::T if (self) { self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); for (auto&& p : PortionsToDrop) { - self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.NumRows()); + self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.GetRecordsCount()); } } } diff --git a/ydb/core/tx/columnshard/engines/column_engine.cpp b/ydb/core/tx/columnshard/engines/column_engine.cpp index 334d77921907..075409c88af2 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine.cpp @@ -40,7 +40,7 @@ TSelectInfo::TStats TSelectInfo::Stats() const { THashSet uniqBlob; for (auto& portionInfo : PortionsOrderedPK) { out.Records += portionInfo->NumChunks(); - out.Rows += portionInfo->NumRows(); + out.Rows += portionInfo->GetRecordsCount(); for (auto& blobId : portionInfo->GetBlobIds()) { out.Bytes += blobId.BlobSize(); } diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index d3943c415f90..7f54b6fb70f6 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -81,7 +81,7 @@ void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, E TColumnEngineStats::TPortionsStats DeltaStats(const TPortionInfo& portionInfo) { TColumnEngineStats::TPortionsStats deltaStats; deltaStats.Bytes = 0; - deltaStats.Rows = portionInfo.NumRows(); + deltaStats.Rows = portionInfo.GetRecordsCount(); deltaStats.Bytes = portionInfo.GetTotalBlobBytes(); deltaStats.RawBytes = portionInfo.GetTotalRawBytes(); deltaStats.Blobs = portionInfo.GetBlobIdsCount(); diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.h b/ydb/core/tx/columnshard/engines/insert_table/meta.h index 253638853159..a7121e46d32f 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.h +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.h @@ -12,7 +12,7 @@ namespace NKikimr::NOlap { class TInsertedDataMeta { private: YDB_READONLY_DEF(TInstant, DirtyWriteTime); - YDB_READONLY(ui32, NumRows, 0); + YDB_READONLY(ui32, RecordsCount, 0); YDB_READONLY(ui64, RawBytes, 0); YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); @@ -34,7 +34,7 @@ class TInsertedDataMeta { { AFL_VERIFY(proto.HasDirtyWriteTimeSeconds())("data", proto.DebugString()); DirtyWriteTime = TInstant::Seconds(proto.GetDirtyWriteTimeSeconds()); - NumRows = proto.GetNumRows(); + RecordsCount = proto.GetNumRows(); RawBytes = proto.GetRawBytes(); if (proto.HasModificationType()) { ModificationType = TEnumOperator::DeserializeFromProto(proto.GetModificationType()); diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.cpp b/ydb/core/tx/columnshard/engines/portions/column_record.cpp index 26c591be64c3..3e8cc0b9db81 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.cpp +++ b/ydb/core/tx/columnshard/engines/portions/column_record.cpp @@ -10,7 +10,7 @@ namespace NKikimr::NOlap { TConclusionStatus TChunkMeta::DeserializeFromProto(const NKikimrTxColumnShard::TIndexColumnMeta& proto) { if (proto.HasNumRows()) { - NumRows = proto.GetNumRows(); + RecordsCount = proto.GetNumRows(); } if (proto.HasRawBytes()) { RawBytes = proto.GetRawBytes(); @@ -28,7 +28,7 @@ TChunkMeta::TChunkMeta(const std::shared_ptr& NKikimrTxColumnShard::TIndexColumnMeta TChunkMeta::SerializeToProto() const { NKikimrTxColumnShard::TIndexColumnMeta meta; - meta.SetNumRows(NumRows); + meta.SetNumRows(RecordsCount); meta.SetRawBytes(RawBytes); return meta; } diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index 5ddc7990b726..7e873fb0b420 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -51,9 +51,9 @@ struct TChunkMeta: public TSimpleChunkMeta { class TTestInstanceBuilder { public: - static TChunkMeta Build(const ui64 numRows, const ui64 rawBytes) { + static TChunkMeta Build(const ui64 recordsCount, const ui64 rawBytes) { TChunkMeta result; - result.NumRows = numRows; + result.RecordsCount = recordsCount; result.RawBytes = rawBytes; return result; } @@ -101,8 +101,8 @@ class TColumnRecord { class TTestInstanceBuilder { public: - static TColumnRecord Build(const ui32 columnId, const ui16 chunkId, const ui64 offset, const ui64 size, const ui64 numRows, const ui64 rawBytes) { - TColumnRecord result(TChunkMeta::TTestInstanceBuilder::Build(numRows, rawBytes)); + static TColumnRecord Build(const ui32 columnId, const ui16 chunkId, const ui64 offset, const ui64 size, const ui64 recordsCount, const ui64 rawBytes) { + TColumnRecord result(TChunkMeta::TTestInstanceBuilder::Build(recordsCount, rawBytes)); result.ColumnId = columnId; result.Chunk = chunkId; result.BlobRange.Offset = offset; @@ -138,7 +138,7 @@ class TColumnRecord { } NArrow::NSplitter::TSimpleSerializationStat GetSerializationStat() const { - return NArrow::NSplitter::TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); + return NArrow::NSplitter::TSimpleSerializationStat(BlobRange.Size, Meta.GetRecordsCount(), Meta.GetRawBytes()); } const TChunkMeta& GetMeta() const { diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp index bf0f8e5eaead..82cdfdab0b5d 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp @@ -77,7 +77,7 @@ TPortionDataAccessor::TPreparedBatchData PrepareForAssembleImpl(const TPortionDa } auto it = blobsData.find(rec.GetAddress()); AFL_VERIFY(it != blobsData.end())("size", blobsData.size())("address", rec.GetAddress().DebugString()); - currentAssembler->AddBlobInfo(rec.Chunk, rec.GetMeta().GetNumRows(), std::move(it->second)); + currentAssembler->AddBlobInfo(rec.Chunk, rec.GetMeta().GetRecordsCount(), std::move(it->second)); blobsData.erase(it); } } @@ -299,6 +299,201 @@ TString TPortionDataAccessor::DebugString() const { return sb << ")"; } +ui64 TPortionDataAccessor::GetColumnRawBytes(const std::set& entityIds, const bool validation /*= true*/) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetMeta().GetRawBytes(); + }; + AggregateIndexChunksData(aggr, PortionInfo->Records, &entityIds, validation); + return sum; +} + +ui64 TPortionDataAccessor::GetColumnBlobBytes(const std::set& entityIds, const bool validation /*= true*/) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetBlobRange().GetSize(); + }; + AggregateIndexChunksData(aggr, PortionInfo->Records, &entityIds, validation); + return sum; +} + +ui64 TPortionDataAccessor::GetIndexRawBytes(const std::set& entityIds, const bool validation /*= true*/) const { + ui64 sum = 0; + const auto aggr = [&](const TIndexChunk& r) { + sum += r.GetRawBytes(); + }; + AggregateIndexChunksData(aggr, PortionInfo->Indexes, &entityIds, validation); + return sum; +} + +ui64 TPortionDataAccessor::GetIndexRawBytes(const bool validation /*= true*/) const { + ui64 sum = 0; + const auto aggr = [&](const TIndexChunk& r) { + sum += r.GetRawBytes(); + }; + AggregateIndexChunksData(aggr, PortionInfo->Indexes, nullptr, validation); + return sum; +} + +std::vector TPortionDataAccessor::GetColumnChunksPointers(const ui32 columnId) const { + std::vector result; + for (auto&& c : PortionInfo->Records) { + if (c.ColumnId == columnId) { + Y_ABORT_UNLESS(c.Chunk == result.size()); + Y_ABORT_UNLESS(c.GetMeta().GetRecordsCount()); + result.emplace_back(&c); + } + } + return result; +} + +std::vector TPortionDataAccessor::BuildPages() const { + std::vector pages; + struct TPart { + public: + const TColumnRecord* Record = nullptr; + const TIndexChunk* Index = nullptr; + const ui32 RecordsCount; + TPart(const TColumnRecord* record, const ui32 recordsCount) + : Record(record) + , RecordsCount(recordsCount) { + } + TPart(const TIndexChunk* record, const ui32 recordsCount) + : Index(record) + , RecordsCount(recordsCount) { + } + }; + std::map> entities; + std::map currentCursor; + ui32 currentSize = 0; + ui32 currentId = 0; + for (auto&& i : Records) { + if (currentId != i.GetColumnId()) { + currentSize = 0; + currentId = i.GetColumnId(); + } + currentSize += i.GetMeta().GetRecordsCount(); + ++currentCursor[currentSize]; + entities[i.GetColumnId()].emplace_back(&i, i.GetMeta().GetRecordsCount()); + } + for (auto&& i : Indexes) { + if (currentId != i.GetIndexId()) { + currentSize = 0; + currentId = i.GetIndexId(); + } + currentSize += i.GetRecordsCount(); + ++currentCursor[currentSize]; + entities[i.GetIndexId()].emplace_back(&i, i.GetRecordsCount()); + } + const ui32 entitiesCount = entities.size(); + ui32 predCount = 0; + for (auto&& i : currentCursor) { + if (i.second != entitiesCount) { + continue; + } + std::vector records; + std::vector indexes; + for (auto&& c : entities) { + ui32 readyCount = 0; + while (readyCount < i.first - predCount && c.second.size()) { + if (c.second.front().Record) { + records.emplace_back(c.second.front().Record); + } else { + AFL_VERIFY(c.second.front().Index); + indexes.emplace_back(c.second.front().Index); + } + readyCount += c.second.front().RecordsCount; + c.second.pop_front(); + } + AFL_VERIFY(readyCount == i.first - predCount)("ready", readyCount)("cursor", i.first)("pred_cursor", predCount); + } + pages.emplace_back(std::move(records), std::move(indexes), i.first - predCount); + predCount = i.first; + } + for (auto&& i : entities) { + AFL_VERIFY(i.second.empty()); + } + return pages; +} + +ui64 TPortionDataAccessor::GetMinMemoryForReadColumns(const std::optional>& columnIds) const { + ui32 columnId = 0; + ui32 chunkIdx = 0; + + struct TDelta { + i64 BlobBytes = 0; + i64 RawBytes = 0; + void operator+=(const TDelta& add) { + BlobBytes += add.BlobBytes; + RawBytes += add.RawBytes; + } + }; + + std::map diffByPositions; + ui64 position = 0; + ui64 RawBytesCurrent = 0; + ui64 BlobBytesCurrent = 0; + std::optional recordsCount; + + const auto doFlushColumn = [&]() { + if (!recordsCount && position) { + recordsCount = position; + } else { + AFL_VERIFY(*recordsCount == position); + } + if (position) { + TDelta delta; + delta.RawBytes = -1 * RawBytesCurrent; + delta.BlobBytes = -1 * BlobBytesCurrent; + diffByPositions[position] += delta; + } + position = 0; + chunkIdx = 0; + RawBytesCurrent = 0; + BlobBytesCurrent = 0; + }; + + for (auto&& i : Records) { + if (columnIds && !columnIds->contains(i.GetColumnId())) { + continue; + } + if (columnId != i.GetColumnId()) { + if (columnId) { + doFlushColumn(); + } + AFL_VERIFY(i.GetColumnId() > columnId); + AFL_VERIFY(i.GetChunkIdx() == 0); + columnId = i.GetColumnId(); + } else { + AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); + } + chunkIdx = i.GetChunkIdx(); + TDelta delta; + delta.RawBytes = -1 * RawBytesCurrent + i.GetMeta().GetRawBytes(); + delta.BlobBytes = -1 * BlobBytesCurrent + i.GetBlobRange().Size; + diffByPositions[position] += delta; + position += i.GetMeta().GetRecordsCount(); + RawBytesCurrent = i.GetMeta().GetRawBytes(); + BlobBytesCurrent = i.GetBlobRange().Size; + } + if (columnId) { + doFlushColumn(); + } + i64 maxRawBytes = 0; + TDelta current; + for (auto&& i : diffByPositions) { + current += i.second; + AFL_VERIFY(current.BlobBytes >= 0); + AFL_VERIFY(current.RawBytes >= 0); + if (maxRawBytes < current.RawBytes) { + maxRawBytes = current.RawBytes; + } + } + AFL_VERIFY(current.BlobBytes == 0)("real", current.BlobBytes); + AFL_VERIFY(current.RawBytes == 0)("real", current.RawBytes); + return maxRawBytes; +} + TConclusion> TPortionDataAccessor::TPreparedColumn::AssembleAccessor() const { Y_ABORT_UNLESS(!Blobs.empty()); diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index 6a675ae125d7..bfa293c75c07 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -18,6 +18,51 @@ class TPortionDataAccessor { const TPortionInfo* PortionInfo; public: + template + static void AggregateIndexChunksData( + const TAggregator& aggr, const std::vector& chunks, const std::set* columnIds, const bool validation) { + if (columnIds) { + auto itColumn = columnIds->begin(); + auto itRecord = chunks.begin(); + ui32 recordsInEntityCount = 0; + while (itRecord != chunks.end() && itColumn != columnIds->end()) { + if (itRecord->GetEntityId() < *itColumn) { + ++itRecord; + } else if (*itColumn < itRecord->GetEntityId()) { + AFL_VERIFY(!validation || recordsInEntityCount)("problem", "validation")("reason", "no_chunks_for_column")( + "column_id", *itColumn); + ++itColumn; + recordsInEntityCount = 0; + } else { + ++recordsInEntityCount; + aggr(*itRecord); + ++itRecord; + } + } + } else { + for (auto&& i : chunks) { + aggr(i); + } + } + } + + template + static void CheckChunksOrder(const std::vector& chunks) { + ui32 entityId = 0; + ui32 chunkIdx = 0; + for (auto&& i : chunks) { + if (entityId != i.GetEntityId()) { + AFL_VERIFY(entityId < i.GetEntityId()); + AFL_VERIFY(i.GetChunkIdx() == 0); + entityId = i.GetEntityId(); + chunkIdx = 0; + } else { + AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); + chunkIdx = i.GetChunkIdx(); + } + } + } + TPortionDataAccessor(const TPortionInfo& portionInfo) : PortionInfo(&portionInfo) { } @@ -26,6 +71,11 @@ class TPortionDataAccessor { return PortionInfo->Records; } + ui64 GetColumnRawBytes(const std::set& entityIds, const bool validation = true) const; + ui64 GetColumnBlobBytes(const std::set& entityIds, const bool validation = true) const; + ui64 GetIndexRawBytes(const std::set& entityIds, const bool validation = true) const; + ui64 GetIndexRawBytes(const bool validation = true) const; + void FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; void FillBlobRangesByStorage(THashMap>& result, const TVersionedIndex& index) const; void FillBlobIdsByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; @@ -34,6 +84,8 @@ class TPortionDataAccessor { THashMap>> RestoreEntityChunks( NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; + std::vector GetColumnChunksPointers(const ui32 columnId) const; + THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const; THashMap> GetBlobIdsByStorage(const TIndexInfo& indexInfo) const { @@ -205,16 +257,16 @@ class TPortionDataAccessor { private: std::vector BlobsInfo; YDB_READONLY(ui32, ColumnId, 0); - const ui32 NumRows; - ui32 NumRowsByChunks = 0; + const ui32 RecordsCount; + ui32 RecordsCountByChunks = 0; const std::shared_ptr DataLoader; const std::shared_ptr ResultLoader; public: TColumnAssemblingInfo( - const ui32 numRows, const std::shared_ptr& dataLoader, const std::shared_ptr& resultLoader) + const ui32 recordsCount, const std::shared_ptr& dataLoader, const std::shared_ptr& resultLoader) : ColumnId(resultLoader->GetColumnId()) - , NumRows(numRows) + , RecordsCount(recordsCount) , DataLoader(dataLoader) , ResultLoader(resultLoader) { AFL_VERIFY(ResultLoader); @@ -232,16 +284,17 @@ class TPortionDataAccessor { void AddBlobInfo(const ui32 expectedChunkIdx, const ui32 expectedRecordsCount, TAssembleBlobInfo&& info) { AFL_VERIFY(expectedChunkIdx == BlobsInfo.size()); info.SetExpectedRecordsCount(expectedRecordsCount); - NumRowsByChunks += expectedRecordsCount; + RecordsCountByChunks += expectedRecordsCount; BlobsInfo.emplace_back(std::move(info)); } TPreparedColumn Compile() { if (BlobsInfo.empty()) { - BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader ? DataLoader->GetDefaultValue() : ResultLoader->GetDefaultValue())); + BlobsInfo.emplace_back( + TAssembleBlobInfo(RecordsCount, DataLoader ? DataLoader->GetDefaultValue() : ResultLoader->GetDefaultValue())); return TPreparedColumn(std::move(BlobsInfo), ResultLoader); } else { - AFL_VERIFY(NumRowsByChunks == NumRows)("by_chunks", NumRowsByChunks)("expected", NumRows); + AFL_VERIFY(RecordsCountByChunks == RecordsCount)("by_chunks", RecordsCountByChunks)("expected", RecordsCount); AFL_VERIFY(DataLoader); return TPreparedColumn(std::move(BlobsInfo), DataLoader); } @@ -252,6 +305,23 @@ class TPortionDataAccessor { THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData, const std::optional& defaultSnapshot = std::nullopt) const; + + class TPage { + private: + YDB_READONLY_DEF(std::vector, Records); + YDB_READONLY_DEF(std::vector, Indexes); + YDB_READONLY(ui32, RecordsCount, 0); + + public: + TPage(std::vector&& records, std::vector&& indexes, const ui32 recordsCount) + : Records(std::move(records)) + , Indexes(std::move(indexes)) + , RecordsCount(recordsCount) { + } + }; + + std::vector BuildPages() const; + ui64 GetMinMemoryForReadColumns(const std::optional>& columnIds) const; }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 410196fb8c07..4a0c75e1851b 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -1,5 +1,6 @@ #include "column_record.h" #include "constructor.h" +#include "data_accessor.h" #include "portion_info.h" #include @@ -9,24 +10,6 @@ namespace NKikimr::NOlap { -ui64 TPortionInfo::GetColumnRawBytes(const std::set& entityIds, const bool validation) const { - ui64 sum = 0; - const auto aggr = [&](const TColumnRecord& r) { - sum += r.GetMeta().GetRawBytes(); - }; - AggregateIndexChunksData(aggr, Records, &entityIds, validation); - return sum; -} - -ui64 TPortionInfo::GetColumnBlobBytes(const std::set& entityIds, const bool validation) const { - ui64 sum = 0; - const auto aggr = [&](const TColumnRecord& r) { - sum += r.GetBlobRange().GetSize(); - }; - AggregateIndexChunksData(aggr, Records, &entityIds, validation); - return sum; -} - ui64 TPortionInfo::GetColumnRawBytes() const { AFL_VERIFY(Precalculated); return PrecalculatedColumnRawBytes; @@ -37,28 +20,10 @@ ui64 TPortionInfo::GetColumnBlobBytes() const { return PrecalculatedColumnBlobBytes; } -ui64 TPortionInfo::GetIndexRawBytes(const std::set& entityIds, const bool validation) const { - ui64 sum = 0; - const auto aggr = [&](const TIndexChunk& r) { - sum += r.GetRawBytes(); - }; - AggregateIndexChunksData(aggr, Indexes, &entityIds, validation); - return sum; -} - -ui64 TPortionInfo::GetIndexRawBytes(const bool validation) const { - ui64 sum = 0; - const auto aggr = [&](const TIndexChunk& r) { - sum += r.GetRawBytes(); - }; - AggregateIndexChunksData(aggr, Indexes, nullptr, validation); - return sum; -} - TString TPortionInfo::DebugString(const bool withDetails) const { TStringBuilder sb; sb << "(portion_id:" << PortionId << ";" - << "path_id:" << PathId << ";records_count:" << NumRows() + << "path_id:" << PathId << ";records_count:" << GetRecordsCount() << ";" "min_schema_snapshot:(" << MinSnapshotDeprecated.DebugString() @@ -92,18 +57,6 @@ TString TPortionInfo::DebugString(const bool withDetails) const { return sb << ")"; } -std::vector TPortionInfo::GetColumnChunksPointers(const ui32 columnId) const { - std::vector result; - for (auto&& c : Records) { - if (c.ColumnId == columnId) { - Y_ABORT_UNLESS(c.Chunk == result.size()); - Y_ABORT_UNLESS(c.GetMeta().GetNumRows()); - result.emplace_back(&c); - } - } - return result; -} - void TPortionInfo::RemoveFromDatabase(IDbWrapper& db) const { db.ErasePortion(*this); for (auto& record : Records) { @@ -127,75 +80,6 @@ void TPortionInfo::SaveToDatabase(IDbWrapper& db, const ui32 firstPKColumnId, co } } -std::vector TPortionInfo::BuildPages() const { - std::vector pages; - struct TPart { - public: - const TColumnRecord* Record = nullptr; - const TIndexChunk* Index = nullptr; - const ui32 RecordsCount; - TPart(const TColumnRecord* record, const ui32 recordsCount) - : Record(record) - , RecordsCount(recordsCount) { - } - TPart(const TIndexChunk* record, const ui32 recordsCount) - : Index(record) - , RecordsCount(recordsCount) { - } - }; - std::map> entities; - std::map currentCursor; - ui32 currentSize = 0; - ui32 currentId = 0; - for (auto&& i : Records) { - if (currentId != i.GetColumnId()) { - currentSize = 0; - currentId = i.GetColumnId(); - } - currentSize += i.GetMeta().GetNumRows(); - ++currentCursor[currentSize]; - entities[i.GetColumnId()].emplace_back(&i, i.GetMeta().GetNumRows()); - } - for (auto&& i : Indexes) { - if (currentId != i.GetIndexId()) { - currentSize = 0; - currentId = i.GetIndexId(); - } - currentSize += i.GetRecordsCount(); - ++currentCursor[currentSize]; - entities[i.GetIndexId()].emplace_back(&i, i.GetRecordsCount()); - } - const ui32 entitiesCount = entities.size(); - ui32 predCount = 0; - for (auto&& i : currentCursor) { - if (i.second != entitiesCount) { - continue; - } - std::vector records; - std::vector indexes; - for (auto&& c : entities) { - ui32 readyCount = 0; - while (readyCount < i.first - predCount && c.second.size()) { - if (c.second.front().Record) { - records.emplace_back(c.second.front().Record); - } else { - AFL_VERIFY(c.second.front().Index); - indexes.emplace_back(c.second.front().Index); - } - readyCount += c.second.front().RecordsCount; - c.second.pop_front(); - } - AFL_VERIFY(readyCount == i.first - predCount)("ready", readyCount)("cursor", i.first)("pred_cursor", predCount); - } - pages.emplace_back(std::move(records), std::move(indexes), i.first - predCount); - predCount = i.first; - } - for (auto&& i : entities) { - AFL_VERIFY(i.second.empty()); - } - return pages; -} - ui64 TPortionInfo::GetMetadataMemorySize() const { return sizeof(TPortionInfo) + Records.size() * (sizeof(TColumnRecord) + 8) + Indexes.size() * sizeof(TIndexChunk) + BlobIds.size() * sizeof(TUnifiedBlobId) - sizeof(TPortionMeta) + Meta.GetMetadataMemorySize(); @@ -350,8 +234,8 @@ void TPortionInfo::ReorderChunks() { } void TPortionInfo::FullValidation() const { - CheckChunksOrder(Records); - CheckChunksOrder(Indexes); + TPortionDataAccessor::CheckChunksOrder(Records); + TPortionDataAccessor::CheckChunksOrder(Indexes); AFL_VERIFY(PathId); AFL_VERIFY(PortionId); AFL_VERIFY(MinSnapshotDeprecated.Valid()); @@ -372,84 +256,6 @@ void TPortionInfo::FullValidation() const { } } -ui64 TPortionInfo::GetMinMemoryForReadColumns(const std::optional>& columnIds) const { - ui32 columnId = 0; - ui32 chunkIdx = 0; - - struct TDelta { - i64 BlobBytes = 0; - i64 RawBytes = 0; - void operator+=(const TDelta& add) { - BlobBytes += add.BlobBytes; - RawBytes += add.RawBytes; - } - }; - - std::map diffByPositions; - ui64 position = 0; - ui64 RawBytesCurrent = 0; - ui64 BlobBytesCurrent = 0; - std::optional recordsCount; - - const auto doFlushColumn = [&]() { - if (!recordsCount && position) { - recordsCount = position; - } else { - AFL_VERIFY(*recordsCount == position); - } - if (position) { - TDelta delta; - delta.RawBytes = -1 * RawBytesCurrent; - delta.BlobBytes = -1 * BlobBytesCurrent; - diffByPositions[position] += delta; - } - position = 0; - chunkIdx = 0; - RawBytesCurrent = 0; - BlobBytesCurrent = 0; - }; - - for (auto&& i : Records) { - if (columnIds && !columnIds->contains(i.GetColumnId())) { - continue; - } - if (columnId != i.GetColumnId()) { - if (columnId) { - doFlushColumn(); - } - AFL_VERIFY(i.GetColumnId() > columnId); - AFL_VERIFY(i.GetChunkIdx() == 0); - columnId = i.GetColumnId(); - } else { - AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); - } - chunkIdx = i.GetChunkIdx(); - TDelta delta; - delta.RawBytes = -1 * RawBytesCurrent + i.GetMeta().GetRawBytes(); - delta.BlobBytes = -1 * BlobBytesCurrent + i.GetBlobRange().Size; - diffByPositions[position] += delta; - position += i.GetMeta().GetNumRows(); - RawBytesCurrent = i.GetMeta().GetRawBytes(); - BlobBytesCurrent = i.GetBlobRange().Size; - } - if (columnId) { - doFlushColumn(); - } - i64 maxRawBytes = 0; - TDelta current; - for (auto&& i : diffByPositions) { - current += i.second; - AFL_VERIFY(current.BlobBytes >= 0); - AFL_VERIFY(current.RawBytes >= 0); - if (maxRawBytes < current.RawBytes) { - maxRawBytes = current.RawBytes; - } - } - AFL_VERIFY(current.BlobBytes == 0)("real", current.BlobBytes); - AFL_VERIFY(current.RawBytes == 0)("real", current.RawBytes); - return maxRawBytes; -} - ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoConstructor& portion) { if (!CurrentSchema || portion.GetMinSnapshotDeprecatedVerified() != LastSnapshot) { CurrentSchema = portion.GetSchema(VersionedIndex); @@ -486,7 +292,7 @@ void TPortionInfo::Precalculate() { PrecalculatedColumnRawBytes += r.GetMeta().GetRawBytes(); PrecalculatedColumnBlobBytes += r.BlobRange.GetSize(); }; - AggregateIndexChunksData(aggr, Records, nullptr, true); + TPortionDataAccessor::AggregateIndexChunksData(aggr, Records, nullptr, true); } } diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 29942b314aec..ccf6727c992e 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -92,50 +92,6 @@ class TPortionInfo { std::vector BlobIds; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto); - template - static void CheckChunksOrder(const std::vector& chunks) { - ui32 entityId = 0; - ui32 chunkIdx = 0; - for (auto&& i : chunks) { - if (entityId != i.GetEntityId()) { - AFL_VERIFY(entityId < i.GetEntityId()); - AFL_VERIFY(i.GetChunkIdx() == 0); - entityId = i.GetEntityId(); - chunkIdx = 0; - } else { - AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); - chunkIdx = i.GetChunkIdx(); - } - } - } - - template - static void AggregateIndexChunksData( - const TAggregator& aggr, const std::vector& chunks, const std::set* columnIds, const bool validation) { - if (columnIds) { - auto itColumn = columnIds->begin(); - auto itRecord = chunks.begin(); - ui32 recordsInEntityCount = 0; - while (itRecord != chunks.end() && itColumn != columnIds->end()) { - if (itRecord->GetEntityId() < *itColumn) { - ++itRecord; - } else if (*itColumn < itRecord->GetEntityId()) { - AFL_VERIFY(!validation || recordsInEntityCount)("problem", "validation")("reason", "no_chunks_for_column")( - "column_id", *itColumn); - ++itColumn; - recordsInEntityCount = 0; - } else { - ++recordsInEntityCount; - aggr(*itRecord); - ++itRecord; - } - } - } else { - for (auto&& i : chunks) { - aggr(i); - } - } - } std::vector Records; public: @@ -147,8 +103,6 @@ class TPortionInfo { return GetMeta().GetCompactionLevel(); } - ui64 GetMinMemoryForReadColumns(const std::optional>& columnIds) const; - bool NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) const; ui64 GetChunksCount() const { @@ -232,6 +186,13 @@ class TPortionInfo { RuntimeFeatures &= (Max() - (TRuntimeFeatures)feature); } + TString GetTierNameDef(const TString& defaultTierName) const { + if (GetMeta().GetTierName()) { + return GetMeta().GetTierName(); + } + return defaultTierName; + } + bool HasRuntimeFeature(const ERuntimeFeature feature) const { if (feature == ERuntimeFeature::Optimized) { if ((RuntimeFeatures & (TRuntimeFeatures)feature)) { @@ -266,32 +227,9 @@ class TPortionInfo { ui64 GetTxVolume() const; // fake-correct method for determ volume on rewrite this portion in transaction progress ui64 GetMetadataMemorySize() const; - class TPage { - private: - YDB_READONLY_DEF(std::vector, Records); - YDB_READONLY_DEF(std::vector, Indexes); - YDB_READONLY(ui32, RecordsCount, 0); - - public: - TPage(std::vector&& records, std::vector&& indexes, const ui32 recordsCount) - : Records(std::move(records)) - , Indexes(std::move(indexes)) - , RecordsCount(recordsCount) { - } - }; - - TString GetTierNameDef(const TString& defaultTierName) const { - if (GetMeta().GetTierName()) { - return GetMeta().GetTierName(); - } - return defaultTierName; - } - static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto, const TIndexInfo& indexInfo); void SerializeToProto(NKikimrColumnShardDataSharingProto::TPortionInfo& proto) const; - std::vector BuildPages() const; - const std::vector& GetRecords() const { return Records; } @@ -349,8 +287,6 @@ class TPortionInfo { static constexpr const ui32 BLOB_BYTES_LIMIT = 8 * 1024 * 1024; - std::vector GetColumnChunksPointers(const ui32 columnId) const; - std::set GetColumnIds() const { std::set result; for (auto&& i : Records) { @@ -529,12 +465,6 @@ class TPortionInfo { return result; } - ui32 NumRows() const { - return GetRecordsCount(); - } - - ui64 GetIndexRawBytes(const std::set& columnIds, const bool validation = true) const; - ui64 GetIndexRawBytes(const bool validation = true) const; ui64 GetIndexBlobBytes() const noexcept { ui64 sum = 0; for (const auto& rec : Indexes) { @@ -543,10 +473,15 @@ class TPortionInfo { return sum; } - ui64 GetColumnRawBytes(const std::set& columnIds, const bool validation = true) const; - ui64 GetColumnRawBytes() const; + ui64 GetIndexRawBytes() const noexcept { + ui64 sum = 0; + for (const auto& rec : Indexes) { + sum += rec.GetRawBytes(); + } + return sum; + } - ui64 GetColumnBlobBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetColumnRawBytes() const; ui64 GetColumnBlobBytes() const; ui64 GetTotalBlobBytes() const noexcept { diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 1723bc2fbb51..6242c392fe55 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -65,7 +65,7 @@ std::vector> TReadPortionInfoWithBlobs::GetEn bool TReadPortionInfoWithBlobs::ExtractColumnChunks( const ui32 entityId, std::vector& records, std::vector>& chunks) { - records = GetPortionInfo().GetColumnChunksPointers(entityId); + records = TPortionDataAccessor(GetPortionInfo()).GetColumnChunksPointers(entityId); if (records.empty()) { return false; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index c3cb0ea27b84..69801d1ff9e7 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -63,23 +63,23 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob if (columnChunks.empty()) { continue; } - auto itFilter = cFilter.GetIterator(false, Portion->NumRows()); + auto itFilter = cFilter.GetIterator(false, Portion->GetRecordsCount()); bool itFinished = false; for (auto&& c : columnChunks) { AFL_VERIFY(!itFinished); - if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRows())) { + if (!itFilter.IsBatchForSkip(c->GetMeta().GetRecordsCount())) { auto reading = blobsAction.GetReading(Portion->GetColumnStorageId(c->GetColumnId(), Schema->GetIndexInfo())); reading->SetIsBackgroundProcess(false); reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); ++fetchedChunks; } else { - defaultBlocks.emplace(c->GetAddress(), TPortionDataAccessor::TAssembleBlobInfo(c->GetMeta().GetNumRows(), + defaultBlocks.emplace(c->GetAddress(), TPortionDataAccessor::TAssembleBlobInfo(c->GetMeta().GetRecordsCount(), Schema->GetExternalDefaultValueVerified(c->GetColumnId()))); ++nullChunks; } - itFinished = !itFilter.Next(c->GetMeta().GetNumRows()); + itFinished = !itFilter.Next(c->GetMeta().GetRecordsCount()); } - AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows()); + AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->GetRecordsCount()); } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)( "reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index c64b9a0cc3ca..dc43143300d1 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -294,9 +294,9 @@ class TPortionDataSource: public IDataSource { } // result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); if (SequentialEntityIds.size()) { - result.InsertValue("min_memory_seq", Portion->GetMinMemoryForReadColumns(SequentialEntityIds)); - result.InsertValue("min_memory_seq_blobs", Portion->GetColumnBlobBytes(SequentialEntityIds)); - result.InsertValue("in_mem", Portion->GetColumnRawBytes(columns, false)); + result.InsertValue("min_memory_seq", TPortionDataAccessor(*Portion).GetMinMemoryForReadColumns(SequentialEntityIds)); + result.InsertValue("min_memory_seq_blobs", TPortionDataAccessor(*Portion).GetColumnBlobBytes(SequentialEntityIds)); + result.InsertValue("in_mem", TPortionDataAccessor(*Portion).GetColumnRawBytes(columns, false)); } result.InsertValue("columns_in_mem", JoinSeq(",", columns)); result.InsertValue("portion_id", Portion->GetPortionId()); @@ -361,10 +361,11 @@ class TPortionDataSource: public IDataSource { selectedInMem.emplace(i); } } - result = Portion->GetMinMemoryForReadColumns(selectedSeq) + Portion->GetColumnBlobBytes(selectedSeq, false) + - Portion->GetColumnRawBytes(selectedInMem, false); + result = TPortionDataAccessor(*Portion).GetMinMemoryForReadColumns(selectedSeq) + + TPortionDataAccessor(*Portion)->GetColumnBlobBytes(selectedSeq, false) + + TPortionDataAccessor(*Portion)->GetColumnRawBytes(selectedInMem, false); } else { - result = Portion->GetColumnRawBytes(columnsIds, false); + result = TPortionDataAccessor(*Portion)->GetColumnRawBytes(columnsIds, false); } FingerprintedData.emplace(fp, result); return result; diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp index 1cd56af82894..8bce9a6bfee4 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp @@ -10,7 +10,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[1], prod); NArrow::Append(*builders[2], ReadMetadata->TabletId); - NArrow::Append(*builders[3], portion.NumRows()); + NArrow::Append(*builders[3], portion.GetRecordsCount()); NArrow::Append(*builders[4], portion.GetColumnRawBytes()); NArrow::Append(*builders[5], portion.GetIndexRawBytes()); NArrow::Append(*builders[6], portion.GetColumnBlobBytes()); diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h index 7bf8aa895f69..e803df700f7b 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h @@ -34,13 +34,13 @@ class TPortionCategoryCounters { } void AddPortion(const std::shared_ptr& p) { - RecordsCount->Add(p->NumRows()); + RecordsCount->Add(p->GetRecordsCount()); Count->Add(1); Bytes->Add(p->GetTotalBlobBytes()); } void RemovePortion(const std::shared_ptr& p) { - RecordsCount->Remove(p->NumRows()); + RecordsCount->Remove(p->GetRecordsCount()); Count->Remove(1); Bytes->Remove(p->GetTotalBlobBytes()); } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index 5d75e8401b98..0d21f30dabcb 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -27,7 +27,7 @@ class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { ColumnPortionsSize += info.GetColumnBlobBytes(); TotalPortionsSize += info.GetTotalBlobBytes(); MetadataMemoryPortionsSize += info.GetMetadataMemorySize(); - RecordsCount += info.NumRows(); + RecordsCount += info.GetRecordsCount(); ++PortionsCount; } @@ -38,7 +38,7 @@ class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { Y_ABORT_UNLESS(ColumnPortionsSize >= 0); TotalPortionsSize -= info.GetTotalBlobBytes(); Y_ABORT_UNLESS(TotalPortionsSize >= 0); - RecordsCount -= info.NumRows(); + RecordsCount -= info.GetRecordsCount(); Y_ABORT_UNLESS(RecordsCount >= 0); --PortionsCount; Y_ABORT_UNLESS(PortionsCount >= 0); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h index 28da42f25991..8de02a1bd463 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h @@ -43,12 +43,12 @@ class TSimplePortionsGroupInfo { void AddPortion(const std::shared_ptr& p) { Bytes += p->GetTotalBlobBytes(); Count += 1; - RecordsCount += p->NumRows(); + RecordsCount += p->GetRecordsCount(); } void RemovePortion(const std::shared_ptr& p) { Bytes -= p->GetTotalBlobBytes(); Count -= 1; - RecordsCount -= p->NumRows(); + RecordsCount -= p->GetRecordsCount(); AFL_VERIFY(Bytes >= 0); AFL_VERIFY(Count >= 0); AFL_VERIFY(RecordsCount >= 0); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.h index 553bd195ec39..f9a3c61cf244 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.h @@ -31,12 +31,12 @@ class TSimplePortionsGroupInfo { void AddPortion(const std::shared_ptr& p) { Bytes += p->GetTotalBlobBytes(); Count += 1; - RecordsCount += p->NumRows(); + RecordsCount += p->GetRecordsCount(); } void RemovePortion(const std::shared_ptr& p) { Bytes -= p->GetTotalBlobBytes(); Count -= 1; - RecordsCount -= p->NumRows(); + RecordsCount -= p->GetRecordsCount(); AFL_VERIFY(Bytes >= 0); AFL_VERIFY(Count >= 0); AFL_VERIFY(RecordsCount >= 0); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/counters/counters.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/counters/counters.h index f7020d3de83a..0f04067f8ef4 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/counters/counters.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/counters/counters.h @@ -35,13 +35,13 @@ class TPortionCategoryCounters { } void AddPortion(const std::shared_ptr& p) { - RecordsCount->Add(p->NumRows()); + RecordsCount->Add(p->GetRecordsCount()); Count->Add(1); Bytes->Add(p->GetTotalBlobBytes()); } void RemovePortion(const std::shared_ptr& p) { - RecordsCount->Remove(p->NumRows()); + RecordsCount->Remove(p->GetRecordsCount()); Count->Remove(1); Bytes->Remove(p->GetTotalBlobBytes()); } diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.h b/ydb/core/tx/columnshard/normalizer/portion/chunks.h index c8a09669c7b8..46c1462a8c86 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.h +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.h @@ -57,7 +57,7 @@ namespace NKikimr::NOlap { }; class TUpdate { - YDB_ACCESSOR(ui64, NumRows, 0); + YDB_ACCESSOR(ui64, RecordsCount, 0); YDB_ACCESSOR(ui64, RawBytes, 0); }; diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp index 6e680b91447b..b0f368afb1aa 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp +++ b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp @@ -8,7 +8,7 @@ TSimpleChunkMeta::TSimpleChunkMeta( const std::shared_ptr& column) { Y_ABORT_UNLESS(column); Y_ABORT_UNLESS(column->GetRecordsCount()); - NumRows = column->GetRecordsCount(); + RecordsCount = column->GetRecordsCount(); RawBytes = column->GetRawSizeVerified(); } diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h index 53de4f2b3b61..6b9964a5d91e 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h @@ -14,7 +14,7 @@ namespace NKikimr::NOlap { class TSimpleChunkMeta { protected: - ui32 NumRows = 0; + ui32 RecordsCount = 0; ui32 RawBytes = 0; TSimpleChunkMeta() = default; public: @@ -24,11 +24,8 @@ class TSimpleChunkMeta { return sizeof(ui32) + sizeof(ui32); } - ui32 GetNumRows() const { - return NumRows; - } ui32 GetRecordsCount() const { - return NumRows; + return RecordsCount; } ui32 GetRawBytes() const { return RawBytes; From 7f81b01217a682a54f55861bdecfb399ab3cc315 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:09:45 +0300 Subject: [PATCH 19/31] fix --- .../tx/columnshard/engines/insert_table/insert_table.cpp | 8 ++++---- ydb/core/tx/columnshard/engines/portions/constructor.h | 2 +- ydb/core/tx/columnshard/engines/portions/portion_info.h | 2 +- .../columnshard/engines/reader/sys_view/chunks/chunks.cpp | 2 +- ydb/core/tx/columnshard/engines/storage/chunks/column.h | 2 +- ydb/core/tx/columnshard/normalizer/portion/chunks.cpp | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp index f372a5367761..d4d1213eee84 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp @@ -30,7 +30,7 @@ TInsertionSummary::TCounters TInsertTable::Commit( continue; } - counters.Rows += data->GetMeta().GetNumRows(); + counters.Rows += data->GetMeta().GetRecordsCount(); counters.RawBytes += data->GetMeta().GetRawBytes(); counters.Bytes += data->BlobSize(); @@ -59,7 +59,7 @@ TInsertionSummary::TCounters TInsertTable::Commit( TInsertionSummary::TCounters TInsertTable::CommitEphemeral(IDbWrapper& dbTable, TCommittedData&& data) { TInsertionSummary::TCounters counters; - counters.Rows += data.GetMeta().GetNumRows(); + counters.Rows += data.GetMeta().GetRecordsCount(); counters.RawBytes += data.GetMeta().GetRawBytes(); counters.Bytes += data.BlobSize(); @@ -156,7 +156,7 @@ std::vector TInsertTable::Read(ui64 pathId, const std::optional< if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { continue; } - result.emplace_back(TCommittedBlob(data.GetBlobRange(), data.GetSnapshot(), data.GetInsertWriteId(), data.GetSchemaVersion(), data.GetMeta().GetNumRows(), + result.emplace_back(TCommittedBlob(data.GetBlobRange(), data.GetSnapshot(), data.GetInsertWriteId(), data.GetSchemaVersion(), data.GetMeta().GetRecordsCount(), start, finish, data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); } } @@ -170,7 +170,7 @@ std::vector TInsertTable::Read(ui64 pathId, const std::optional< if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { continue; } - result.emplace_back(TCommittedBlob(data.GetBlobRange(), writeId, data.GetSchemaVersion(), data.GetMeta().GetNumRows(), start, finish, + result.emplace_back(TCommittedBlob(data.GetBlobRange(), writeId, data.GetSchemaVersion(), data.GetMeta().GetRecordsCount(), start, finish, data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); } } diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.h b/ydb/core/tx/columnshard/engines/portions/constructor.h index e86db08d493a..acd4652e1ca3 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor.h @@ -249,7 +249,7 @@ class TPortionInfoConstructor { std::optional columnIdFirst; for (auto&& i : Records) { if (!columnIdFirst || *columnIdFirst == i.ColumnId) { - result += i.GetMeta().GetNumRows(); + result += i.GetMeta().GetRecordsCount(); columnIdFirst = i.ColumnId; } } diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index ccf6727c992e..8fb3fa6a8ff4 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -456,7 +456,7 @@ class TPortionInfo { std::optional columnIdFirst; for (auto&& i : Records) { if (!columnIdFirst || *columnIdFirst == i.ColumnId) { - result += i.GetMeta().GetNumRows(); + result += i.GetMeta().GetRecordsCount(); columnIdFirst = i.ColumnId; } else { break; diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index 3feda5330345..c568ad0e82ef 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -35,7 +35,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[0], portion.GetPathId()); NArrow::Append(*builders[1], prodView); NArrow::Append(*builders[2], ReadMetadata->TabletId); - NArrow::Append(*builders[3], r->GetMeta().GetNumRows()); + NArrow::Append(*builders[3], r->GetMeta().GetRecordsCount()); NArrow::Append(*builders[4], r->GetMeta().GetRawBytes()); NArrow::Append(*builders[5], portion.GetPortionId()); NArrow::Append(*builders[6], r->GetChunkIdx()); diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.h b/ydb/core/tx/columnshard/engines/storage/chunks/column.h index f7a3c33382a8..7d010d3c4158 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.h @@ -20,7 +20,7 @@ class TChunkPreparation: public IPortionColumnChunk { return Data; } virtual ui32 DoGetRecordsCountImpl() const override { - return Record.GetMeta().GetNumRows(); + return Record.GetMeta().GetRecordsCount(); } virtual ui64 DoGetRawBytesImpl() const override { return Record.GetMeta().GetRawBytes(); diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp index 53004cb86752..fcd56dbb0515 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp @@ -23,7 +23,7 @@ class TChunksNormalizer::TNormalizerResult: public INormalizerChanges { for (auto&& chunkInfo : Chunks) { NKikimrTxColumnShard::TIndexColumnMeta metaProto = chunkInfo.GetMetaProto(); - metaProto.SetNumRows(chunkInfo.GetUpdate().GetNumRows()); + metaProto.SetNumRows(chunkInfo.GetUpdate().GetRecordsCount()); metaProto.SetRawBytes(chunkInfo.GetUpdate().GetRawBytes()); const auto& key = chunkInfo.GetKey(); @@ -64,7 +64,7 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { auto batch = assembleBlob.BuildRecordBatch(*columnLoader).DetachResult(); Y_ABORT_UNLESS(!!batch); - chunkInfo.MutableUpdate().SetNumRows(batch->GetRecordsCount()); + chunkInfo.MutableUpdate().SetRecordsCount(batch->GetRecordsCount()); chunkInfo.MutableUpdate().SetRawBytes(batch->GetRawSizeVerified()); } From 801d14f3e34d20bf0c54299f1a112264ff431148 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:10:46 +0300 Subject: [PATCH 20/31] fix --- ydb/core/tx/columnshard/engines/portions/data_accessor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp index 82cdfdab0b5d..6fb17478ab6c 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp @@ -367,7 +367,7 @@ std::vector TPortionDataAccessor::BuildPages() cons std::map currentCursor; ui32 currentSize = 0; ui32 currentId = 0; - for (auto&& i : Records) { + for (auto&& i : PortionInfo->Records) { if (currentId != i.GetColumnId()) { currentSize = 0; currentId = i.GetColumnId(); @@ -376,7 +376,7 @@ std::vector TPortionDataAccessor::BuildPages() cons ++currentCursor[currentSize]; entities[i.GetColumnId()].emplace_back(&i, i.GetMeta().GetRecordsCount()); } - for (auto&& i : Indexes) { + for (auto&& i : PortionInfo->Indexes) { if (currentId != i.GetIndexId()) { currentSize = 0; currentId = i.GetIndexId(); @@ -453,7 +453,7 @@ ui64 TPortionDataAccessor::GetMinMemoryForReadColumns(const std::optionalRecords) { if (columnIds && !columnIds->contains(i.GetColumnId())) { continue; } From c69cfe3c1b7f2dc0c84e65c7d5e8901b8b325537 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:22:39 +0300 Subject: [PATCH 21/31] fix --- .../columnshard/engines/portions/read_with_blobs.cpp | 2 +- .../engines/reader/plain_reader/iterator/source.h | 10 +++++----- .../engines/storage/granule/portions_index.h | 3 ++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 6242c392fe55..4baed92b272d 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -91,7 +91,7 @@ std::optional TReadPortionInfoWithBlobs::SyncP return {}; } NYDBTest::TControllers::GetColumnShardController()->OnPortionActualization(source.PortionInfo); - auto pages = source.PortionInfo.BuildPages(); + auto pages = TPortionDataAccessor(source.PortionInfo).BuildPages(); std::vector pageSizes; for (auto&& p : pages) { pageSizes.emplace_back(p.GetRecordsCount()); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index dc43143300d1..d19c9e527e7a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -362,21 +362,21 @@ class TPortionDataSource: public IDataSource { } } result = TPortionDataAccessor(*Portion).GetMinMemoryForReadColumns(selectedSeq) + - TPortionDataAccessor(*Portion)->GetColumnBlobBytes(selectedSeq, false) + - TPortionDataAccessor(*Portion)->GetColumnRawBytes(selectedInMem, false); + TPortionDataAccessor(*Portion).GetColumnBlobBytes(selectedSeq, false) + + TPortionDataAccessor(*Portion).GetColumnRawBytes(selectedInMem, false); } else { - result = TPortionDataAccessor(*Portion)->GetColumnRawBytes(columnsIds, false); + result = TPortionDataAccessor(*Portion).GetColumnRawBytes(columnsIds, false); } FingerprintedData.emplace(fp, result); return result; } virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const override { - return Portion->GetColumnBlobBytes(columnsIds, false); + return TPortionDataAccessor(*Portion).GetColumnBlobBytes(columnsIds, false); } virtual ui64 GetIndexRawBytes(const std::set& indexIds) const override { - return Portion->GetIndexRawBytes(indexIds, false); + return TPortionDataAccessor(*Portion).GetIndexRawBytes(indexIds, false); } const TPortionInfo& GetPortionInfo() const { diff --git a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h index 981943dc4dab..10eb96a7b33b 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace NKikimr::NOlap { class TGranuleMeta; @@ -17,7 +18,7 @@ class TPortionInfoStat { public: TPortionInfoStat(const std::shared_ptr& portionInfo) : PortionInfo(portionInfo) - , MinRawBytes(PortionInfo->GetMinMemoryForReadColumns({})) + , MinRawBytes(TPortionDataAccessor(*PortionInfo).GetMinMemoryForReadColumns({})) , BlobBytes(PortionInfo->GetTotalBlobBytes()) { From 556837505b52b28e8fb049be4b2a48d394618570 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:28:20 +0300 Subject: [PATCH 22/31] fixes --- .../engines/reader/plain_reader/iterator/source.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index 69801d1ff9e7..ca8e7763016c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -59,7 +59,7 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob ui32 fetchedChunks = 0; ui32 nullChunks = 0; for (auto&& i : columnIds) { - auto columnChunks = Portion->GetColumnChunksPointers(i); + auto columnChunks = TPortionDataAccessor(*Portion).GetColumnChunksPointers(i); if (columnChunks.empty()) { continue; } @@ -152,7 +152,7 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in THashMap> indexBlobs; std::set indexIds = indexChecker->GetIndexIds(); // NActors::TLogContextGuard gLog = NActors::TLogContextBuilder::Build()("records_count", GetRecordsCount())("portion_id", Portion->GetAddress().DebugString()); - std::vector pages = Portion->BuildPages(); + std::vector pages = TPortionDataAccessor(*Portion).BuildPages(); NArrow::TColumnFilter constructor = NArrow::TColumnFilter::BuildAllowFilter(); for (auto&& p : pages) { for (auto&& i : p.GetIndexes()) { From efbe70a08bb9dc06b03a5e416ab942f4f393a0d9 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:35:42 +0300 Subject: [PATCH 23/31] correction --- ydb/core/tx/columnshard/engines/portions/data_accessor.h | 8 ++++++++ ydb/core/tx/columnshard/engines/portions/portion_info.h | 3 --- .../tx/columnshard/engines/portions/read_with_blobs.cpp | 2 +- .../columnshard/engines/reader/sys_view/chunks/chunks.cpp | 6 +++--- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index bfa293c75c07..7677a066cc0a 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -320,6 +320,14 @@ class TPortionDataAccessor { } }; + const std::vector& GetRecords() const { + return PortionInfo->Records; + } + + const std::vector& GetIndexes() const { + return PortionInfo->Indexes; + } + std::vector BuildPages() const; ui64 GetMinMemoryForReadColumns(const std::optional>& columnIds) const; }; diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 8fb3fa6a8ff4..4e080283430f 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -316,9 +316,6 @@ class TPortionInfo { bool ValidSnapshotInfo() const { return MinSnapshotDeprecated.Valid() && PathId && PortionId; } - size_t NumChunks() const { - return Records.size(); - } TString DebugString(const bool withDetails = false) const; diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 4baed92b272d..3f7d0b49146d 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -18,7 +18,7 @@ void TReadPortionInfoWithBlobs::RestoreChunk(const std::shared_ptr> TReadPortionInfoWithBlobs::RestoreBatch( const ISnapshotSchema& data, const ISnapshotSchema& resultSchema, const std::set& seqColumns) const { THashMap blobs; - for (auto&& i : PortionInfo.GetRecords()) { + for (auto&& i : TPortionDataAccessor(PortionInfo).GetRecords()) { blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index c568ad0e82ef..a70a489fd53b 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -21,7 +21,7 @@ void TStatsIterator::AppendStats(const std::vector records; - for (auto&& r : portion.GetRecords()) { + for (auto&& r : TPortionDataAccessor(portion).GetRecords()) { records.emplace_back(&r); } if (Reverse) { @@ -133,7 +133,7 @@ std::shared_ptr>& builders, NAbstract::TGranuleMetaView& granule) const { ui64 recordsCount = 0; while (auto portion = granule.PopFrontPortion()) { - recordsCount += portion->GetRecords().size() + portion->GetIndexes().size(); + recordsCount += TPortionDataAccessor(*portion).GetRecords().size() + TPortionDataAccessor(*portion).GetIndexes().size(); AppendStats(builders, *portion); if (recordsCount > 10000) { break; @@ -145,7 +145,7 @@ bool TStatsIterator::AppendStats(const std::vectorGetRecords().size() + portion->GetIndexes().size(); + recordsCount += TPortionDataAccessor(*portion).GetRecords().size() + TPortionDataAccessor(*portion).GetIndexes().size(); if (recordsCount > 10000) { break; } From 85202a25c8e05dd7d553e3044b201de90ca865cf Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:42:25 +0300 Subject: [PATCH 24/31] fix --- ydb/core/tx/columnshard/engines/column_engine.cpp | 9 --------- ydb/core/tx/columnshard/engines/column_engine.h | 4 ---- .../reader/plain_reader/constructor/read_metadata.h | 8 +------- ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp | 1 - 4 files changed, 1 insertion(+), 21 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/column_engine.cpp b/ydb/core/tx/columnshard/engines/column_engine.cpp index 075409c88af2..a211abcdca3c 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine.cpp @@ -25,21 +25,12 @@ ui64 IColumnEngine::GetMetadataLimit() { } } -size_t TSelectInfo::NumChunks() const { - size_t records = 0; - for (auto& portionInfo : PortionsOrderedPK) { - records += portionInfo->NumChunks(); - } - return records; -} - TSelectInfo::TStats TSelectInfo::Stats() const { TStats out; out.Portions = PortionsOrderedPK.size(); THashSet uniqBlob; for (auto& portionInfo : PortionsOrderedPK) { - out.Records += portionInfo->NumChunks(); out.Rows += portionInfo->GetRecordsCount(); for (auto& blobId : portionInfo->GetBlobIds()) { out.Bytes += blobId.BlobSize(); diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 816fa2f9f8de..786d72e5c323 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -30,14 +30,12 @@ class TManager; struct TSelectInfo { struct TStats { size_t Portions{}; - size_t Records{}; size_t Blobs{}; size_t Rows{}; size_t Bytes{}; const TStats& operator+=(const TStats& stats) { Portions += stats.Portions; - Records += stats.Records; Blobs += stats.Blobs; Rows += stats.Rows; Bytes += stats.Bytes; @@ -47,8 +45,6 @@ struct TSelectInfo { std::vector> PortionsOrderedPK; - size_t NumChunks() const; - TStats Stats() const; void DebugStream(IOutputStream& out); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h index 5f5ad70db296..50befec8387d 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h @@ -145,11 +145,6 @@ struct TReadMetadata : public TReadMetadataBase { return SelectInfo->PortionsOrderedPK.empty() && CommittedBlobs.empty(); } - size_t NumIndexedChunks() const { - Y_ABORT_UNLESS(SelectInfo); - return SelectInfo->NumChunks(); - } - size_t NumIndexedBlobs() const { Y_ABORT_UNLESS(SelectInfo); return SelectInfo->Stats().Blobs; @@ -158,8 +153,7 @@ struct TReadMetadata : public TReadMetadataBase { std::unique_ptr StartScan(const std::shared_ptr& readContext) const override; void Dump(IOutputStream& out) const override { - out << " index chunks: " << NumIndexedChunks() - << " index blobs: " << NumIndexedBlobs() + out << " index blobs: " << NumIndexedBlobs() << " committed blobs: " << CommittedBlobs.size() // << " with program steps: " << (Program ? Program->Steps.size() : 0) << " at snapshot: " << GetRequestSnapshot().DebugString(); diff --git a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp index 43543e0e03d1..db31b3c2a766 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp @@ -497,7 +497,6 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { ui64 txId = 1; auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false), false); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSnapshotColumnIdsSet().size() - 1); } { // select another pathId From 2382bfef0103463bf8401e44ed25f22cd9e013d4 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 16:45:10 +0300 Subject: [PATCH 25/31] fix --- ydb/core/tx/columnshard/engines/portions/data_accessor.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index 7677a066cc0a..ee183f01818f 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -67,10 +67,6 @@ class TPortionDataAccessor { : PortionInfo(&portionInfo) { } - const std::vector& GetRecords() const { - return PortionInfo->Records; - } - ui64 GetColumnRawBytes(const std::set& entityIds, const bool validation = true) const; ui64 GetColumnBlobBytes(const std::set& entityIds, const bool validation = true) const; ui64 GetIndexRawBytes(const std::set& entityIds, const bool validation = true) const; From c805b2ab1f52aa9af2e540b312615848d1eaea56 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 17:51:16 +0300 Subject: [PATCH 26/31] close records and indexes in portion info --- .../engines/changes/general_compaction.cpp | 6 ++++-- .../columnshard/engines/portions/constructor.h | 4 ++-- .../engines/portions/data_accessor.h | 8 ++++++++ .../columnshard/engines/portions/portion_info.h | 17 ++--------------- .../reader/plain_reader/iterator/source.h | 4 ++-- 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index 5b0d9bd7ec26..e1bf92ed4c69 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -116,7 +116,7 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( dataColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); } if (dataColumnIds.size() != resultSchema->GetColumnsCount()) { - for (auto id : i.GetColumnIds()) { + for (auto id : TPortionDataAccessor(i).GetColumnIds()) { if (resultSchema->HasColumnId(id)) { dataColumnIds.emplace(id); } @@ -239,8 +239,9 @@ std::shared_ptr TGeneralCo ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPortion(const TPortionInfo& portionInfo) { SumMemoryFix += portionInfo.GetRecordsCount() * (2 * sizeof(ui64) + sizeof(ui32) + sizeof(ui16)) + portionInfo.GetTotalBlobBytes(); ++PortionsCount; - auto it = MaxMemoryByColumnChunk.begin(); SumMemoryDelta = 0; +/* + auto it = MaxMemoryByColumnChunk.begin(); const auto advanceIterator = [&](const ui32 columnId, const ui64 maxColumnChunkRawBytes) { while (it != MaxMemoryByColumnChunk.end() && it->ColumnId < columnId) { ++it; @@ -266,6 +267,7 @@ ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPorti } } advanceIterator(columnId, maxChunkSize); +*/ AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("memory_prediction_after", SumMemoryFix + SumMemoryDelta)( "portion_info", portionInfo.DebugString()); diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.h b/ydb/core/tx/columnshard/engines/portions/constructor.h index acd4652e1ca3..e349d172012b 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor.h @@ -111,8 +111,8 @@ class TPortionInfoConstructor { MetaConstructor = TPortionMetaConstructor(portion.Meta); } if (withBlobs) { - Indexes = portion.GetIndexes(); - Records = portion.GetRecords(); + Indexes = portion.Indexes; + Records = portion.Records; BlobIds = portion.BlobIds; } } diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index ee183f01818f..106ccae65999 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -67,6 +67,14 @@ class TPortionDataAccessor { : PortionInfo(&portionInfo) { } + std::set GetColumnIds() const { + std::set result; + for (auto&& i : PortionInfo->Records) { + result.emplace(i.GetColumnId()); + } + return result; + } + ui64 GetColumnRawBytes(const std::set& entityIds, const bool validation = true) const; ui64 GetColumnBlobBytes(const std::set& entityIds, const bool validation = true) const; ui64 GetIndexRawBytes(const std::set& entityIds, const bool validation = true) const; diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 4e080283430f..e9c56e66eafb 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -81,17 +81,16 @@ class TPortionInfo { ui64 PathId = 0; ui64 PortionId = 0; // Id of independent (overlayed by PK) portion of data in pathId TSnapshot MinSnapshotDeprecated = TSnapshot::Zero(); // {PlanStep, TxId} is min snapshot for {Granule, Portion} - TSnapshot RemoveSnapshot = - TSnapshot::Zero(); // {XPlanStep, XTxId} is snapshot where the blob has been removed (i.e. compacted into another one) + TSnapshot RemoveSnapshot = TSnapshot::Zero(); std::optional SchemaVersion; std::optional ShardingVersion; TPortionMeta Meta; - YDB_READONLY_DEF(std::vector, Indexes); YDB_READONLY(TRuntimeFeatures, RuntimeFeatures, 0); std::vector BlobIds; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto); + std::vector Indexes; std::vector Records; public: @@ -230,10 +229,6 @@ class TPortionInfo { static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto, const TIndexInfo& indexInfo); void SerializeToProto(NKikimrColumnShardDataSharingProto::TPortionInfo& proto) const; - const std::vector& GetRecords() const { - return Records; - } - ui64 GetPathId() const { return PathId; } @@ -287,14 +282,6 @@ class TPortionInfo { static constexpr const ui32 BLOB_BYTES_LIMIT = 8 * 1024 * 1024; - std::set GetColumnIds() const { - std::set result; - for (auto&& i : Records) { - result.emplace(i.GetColumnId()); - } - return result; - } - NArrow::NSplitter::TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { NArrow::NSplitter::TSerializationStats result; for (auto&& i : Records) { diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index d19c9e527e7a..fc17224633d6 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -288,7 +288,7 @@ class TPortionDataSource: public IDataSource { virtual NJson::TJsonValue DoDebugJsonForMemory() const override { NJson::TJsonValue result = TBase::DoDebugJsonForMemory(); - auto columns = Portion->GetColumnIds(); + auto columns = TPortionDataAccessor(*Portion).GetColumnIds(); for (auto&& i : SequentialEntityIds) { AFL_VERIFY(columns.erase(i)); } @@ -302,7 +302,7 @@ class TPortionDataSource: public IDataSource { result.InsertValue("portion_id", Portion->GetPortionId()); result.InsertValue("raw", Portion->GetTotalRawBytes()); result.InsertValue("blob", Portion->GetTotalBlobBytes()); - result.InsertValue("read_memory", GetColumnRawBytes(Portion->GetColumnIds())); + result.InsertValue("read_memory", GetColumnRawBytes(TPortionDataAccessor(*Portion).GetColumnIds())); return result; } virtual void DoAbort() override; From 6645bfc8073355f63b9ab0a0eb67190f3e5e6f4a Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 18:03:50 +0300 Subject: [PATCH 27/31] clean --- .../engines/portions/portion_info.cpp | 35 ------------------- .../engines/portions/portion_info.h | 1 - 2 files changed, 36 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 4a0c75e1851b..c54f8c22beaf 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -198,41 +198,6 @@ ISnapshotSchema::TPtr TPortionInfo::GetSchema(const TVersionedIndex& index) cons return index.GetSchema(MinSnapshotDeprecated); } -void TPortionInfo::ReorderChunks() { - { - auto pred = [](const TColumnRecord& l, const TColumnRecord& r) { - return l.GetAddress() < r.GetAddress(); - }; - std::sort(Records.begin(), Records.end(), pred); - std::optional chunk; - for (auto&& i : Records) { - if (!chunk) { - chunk = i.GetAddress(); - } else { - AFL_VERIFY(*chunk < i.GetAddress()); - chunk = i.GetAddress(); - } - AFL_VERIFY(chunk->GetEntityId()); - } - } - { - auto pred = [](const TIndexChunk& l, const TIndexChunk& r) { - return l.GetAddress() < r.GetAddress(); - }; - std::sort(Indexes.begin(), Indexes.end(), pred); - std::optional chunk; - for (auto&& i : Indexes) { - if (!chunk) { - chunk = i.GetAddress(); - } else { - AFL_VERIFY(*chunk < i.GetAddress()); - chunk = i.GetAddress(); - } - AFL_VERIFY(chunk->GetEntityId()); - } - } -} - void TPortionInfo::FullValidation() const { TPortionDataAccessor::CheckChunksOrder(Records); TPortionDataAccessor::CheckChunksOrder(Indexes); diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index e9c56e66eafb..95e6afe8887e 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -204,7 +204,6 @@ class TPortionInfo { } void FullValidation() const; - void ReorderChunks(); const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); From 019c2767183e35e475d79fdda9b8920c3bb19cfa Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 18:13:54 +0300 Subject: [PATCH 28/31] fixes --- .../engines/changes/general_compaction.cpp | 2 +- .../engines/portions/data_accessor.h | 10 +++++ .../engines/portions/portion_info.cpp | 13 ++++++ .../engines/portions/portion_info.h | 40 +++++-------------- .../reader/sys_view/portions/portions.cpp | 11 +---- 5 files changed, 34 insertions(+), 42 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index e1bf92ed4c69..010a3b0fb7b7 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -111,7 +111,7 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( dataColumnIds = ISnapshotSchema::GetColumnsWithDifferentDefaults(schemas, resultSchema); } for (auto&& i : SwitchedPortions) { - stats->Merge(i.GetSerializationStat(*resultSchema)); + stats->Merge(TPortionDataAccessor(i).GetSerializationStat(*resultSchema)); if (i.GetMeta().GetDeletionsCount()) { dataColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); } diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index 106ccae65999..aacf90a9a4ad 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -75,6 +75,16 @@ class TPortionDataAccessor { return result; } + NArrow::NSplitter::TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { + NArrow::NSplitter::TSerializationStats result; + for (auto&& i : PortionInfo->Records) { + if (schema.GetFieldByColumnIdOptional(i.ColumnId)) { + result.AddStat(i.GetSerializationStat(schema.GetFieldByColumnIdVerified(i.ColumnId)->name())); + } + } + return result; + } + ui64 GetColumnRawBytes(const std::set& entityIds, const bool validation = true) const; ui64 GetColumnBlobBytes(const std::set& entityIds, const bool validation = true) const; ui64 GetIndexRawBytes(const std::set& entityIds, const bool validation = true) const; diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index c54f8c22beaf..3d21a5fd104a 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -253,12 +253,25 @@ void TPortionInfo::Precalculate() { { PrecalculatedColumnRawBytes = 0; PrecalculatedColumnBlobBytes = 0; + PrecalculatedRecordsCount = 0; const auto aggr = [&](const TColumnRecord& r) { PrecalculatedColumnRawBytes += r.GetMeta().GetRawBytes(); PrecalculatedColumnBlobBytes += r.BlobRange.GetSize(); + if (r.GetColumnId() == Records.front().GetColumnId()) { + PrecalculatedRecordsCount = r.GetMeta().GetRecordsCount(); + } }; TPortionDataAccessor::AggregateIndexChunksData(aggr, Records, nullptr, true); } + { + PrecalculatedIndexRawBytes = 0; + PrecalculatedIndexBlobBytes = 0; + const auto aggr = [&](const TIndexChunk& r) { + PrecalculatedIndexRawBytes += r.GetRawBytes(); + PrecalculatedIndexBlobBytes += r.GetDataSize(); + }; + TPortionDataAccessor::AggregateIndexChunksData(aggr, Indexes, nullptr, true); + } } } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 95e6afe8887e..97fe59e41c31 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -65,6 +65,9 @@ class TPortionInfo { ui64 PrecalculatedColumnRawBytes = 0; ui64 PrecalculatedColumnBlobBytes = 0; + ui64 PrecalculatedRecordsCount = 0; + ui64 PrecalculatedIndexBlobBytes = 0; + ui64 PrecalculatedIndexRawBytes = 0; bool Precalculated = false; void Precalculate(); @@ -281,16 +284,6 @@ class TPortionInfo { static constexpr const ui32 BLOB_BYTES_LIMIT = 8 * 1024 * 1024; - NArrow::NSplitter::TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { - NArrow::NSplitter::TSerializationStats result; - for (auto&& i : Records) { - if (schema.GetFieldByColumnIdOptional(i.ColumnId)) { - result.AddStat(i.GetSerializationStat(schema.GetFieldByColumnIdVerified(i.ColumnId)->name())); - } - } - return result; - } - const TPortionMeta& GetMeta() const { return Meta; } @@ -435,33 +428,18 @@ class TPortionInfo { ISnapshotSchema::TPtr GetSchema(const TVersionedIndex& index) const; ui32 GetRecordsCount() const { - ui32 result = 0; - std::optional columnIdFirst; - for (auto&& i : Records) { - if (!columnIdFirst || *columnIdFirst == i.ColumnId) { - result += i.GetMeta().GetRecordsCount(); - columnIdFirst = i.ColumnId; - } else { - break; - } - } - return result; + AFL_VERIFY(Precalculated); + return PrecalculatedRecordsCount; } ui64 GetIndexBlobBytes() const noexcept { - ui64 sum = 0; - for (const auto& rec : Indexes) { - sum += rec.GetDataSize(); - } - return sum; + AFL_VERIFY(Precalculated); + return PrecalculatedIndexBlobBytes; } ui64 GetIndexRawBytes() const noexcept { - ui64 sum = 0; - for (const auto& rec : Indexes) { - sum += rec.GetRawBytes(); - } - return sum; + AFL_VERIFY(Precalculated); + return PrecalculatedIndexRawBytes; } ui64 GetColumnRawBytes() const; diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp index 8bce9a6bfee4..ea4fafa737cf 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp @@ -20,16 +20,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[10], arrow::util::string_view(tierName.data(), tierName.size())); - NJson::TJsonValue statReport = NJson::JSON_ARRAY; - for (auto&& i : portion.GetIndexes()) { - if (!i.HasBlobData()) { - continue; - } - auto schema = portion.GetSchema(ReadMetadata->GetIndexVersions()); - auto indexMeta = schema->GetIndexInfo().GetIndexVerified(i.GetEntityId()); - statReport.AppendValue(indexMeta->SerializeDataToJson(i, schema->GetIndexInfo())); - } - auto statInfo = statReport.GetStringRobust(); + const TString statInfo = Default(); NArrow::Append(*builders[11], arrow::util::string_view(statInfo.data(), statInfo.size())); NArrow::Append(*builders[12], portion.HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized)); From e755064526bcd589258e0ec86f2f5059ec662870 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 18:22:37 +0300 Subject: [PATCH 29/31] fix --- .../tx/columnshard/data_sharing/source/session/cursor.cpp | 4 ++-- .../engines/reader/plain_reader/iterator/source.cpp | 2 +- .../tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp index 93c2ff4ca19d..6f24fa0bf129 100644 --- a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp +++ b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp @@ -26,8 +26,8 @@ void TSourceCursor::BuildSelection(const std::shared_ptr& stor NextPortionId = itPortion->first; } else { portions.emplace_back(*itPortion->second); - chunksCount += portions.back().GetRecords().size(); - chunksCount += portions.back().GetIndexes().size(); + chunksCount += TPortionDataAccessor(portions.back()).GetRecords().size(); + chunksCount += TPortionDataAccessor(portions.back()).GetIndexes().size(); ++count; } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index ca8e7763016c..6fd823a5c889 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -119,7 +119,7 @@ bool TPortionDataSource::DoStartFetchingIndexes( TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); { std::set indexIds; - for (auto&& i : Portion->GetIndexes()) { + for (auto&& i : TPortionDataAccessor(*Portion).GetIndexes()) { if (!indexes->GetIndexIdsSet().contains(i.GetIndexId())) { continue; } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index a70a489fd53b..c7ec07c26e9b 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -80,7 +80,7 @@ void TStatsIterator::AppendStats(const std::vector indexes; - for (auto&& r : portion.GetIndexes()) { + for (auto&& r : TPortionDataAccessor(portion).GetIndexes()) { indexes.emplace_back(&r); } if (Reverse) { From 33d9b02e862ac9da1acbb483613e866395c1a4b1 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 18:30:48 +0300 Subject: [PATCH 30/31] correction --- .../transaction/tx_blobs_written.cpp | 2 +- .../engines/portions/data_accessor.cpp | 46 +++++++++++++++++++ .../engines/portions/data_accessor.h | 39 +++++++++------- .../engines/portions/portion_info.cpp | 46 ------------------- .../engines/portions/portion_info.h | 6 --- .../engines/storage/granule/granule.h | 6 +-- 6 files changed, 72 insertions(+), 73 deletions(-) diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.cpp index 20f26e5b1986..dab6e021fc84 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_blobs_written.cpp @@ -34,7 +34,7 @@ bool TTxBlobsWritingFinished::DoExecute(TTransactionContext& txc, const TActorCo if (operation->GetBehaviour() == EOperationBehaviour::NoTxWrite) { granule.CommitImmediateOnExecute(txc, *CommitSnapshot, portion.GetPortionInfo()); } else { - granule.InsertPortionOnExecute(txc, portion.GetPortionInfo()); + granule.InsertPortionOnExecute(txc, TPortionDataAccessor(*portion.GetPortionInfo())); } } } diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp index 6fb17478ab6c..f515f7e4212f 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.cpp @@ -494,6 +494,52 @@ ui64 TPortionDataAccessor::GetMinMemoryForReadColumns(const std::optionalRecords) { + db.WriteColumn(*this, record, firstPKColumnId); + } + for (auto& record : PortionInfo->Indexes) { + db.WriteIndex(*this, record); + } + } +} + +void TPortionDataAccessor::RemoveFromDatabase(IDbWrapper& db) const { + db.ErasePortion(*PortionInfo); + for (auto& record : PortionInfo->Records) { + db.EraseColumn(*this, record); + } + for (auto& record : PortionInfo->Indexes) { + db.EraseIndex(*this, record); + } +} + +void TPortionDataAccessor::FullValidation() const { + CheckChunksOrder(PortionInfo->Records); + CheckChunksOrder(PortionInfo->Indexes); + AFL_VERIFY(PathId); + AFL_VERIFY(PortionId); + AFL_VERIFY(MinSnapshotDeprecated.Valid()); + std::set blobIdxs; + for (auto&& i : PortionInfo->Records) { + blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + } + for (auto&& i : PortionInfo->Indexes) { + if (auto bRange = i.GetBlobRangeOptional()) { + blobIdxs.emplace(bRange->GetBlobIdxVerified()); + } + } + if (BlobIds.size()) { + AFL_VERIFY(BlobIds.size() == blobIdxs.size()); + AFL_VERIFY(BlobIds.size() == *blobIdxs.rbegin() + 1); + } else { + AFL_VERIFY(blobIdxs.empty()); + } +} + TConclusion> TPortionDataAccessor::TPreparedColumn::AssembleAccessor() const { Y_ABORT_UNLESS(!Blobs.empty()); diff --git a/ydb/core/tx/columnshard/engines/portions/data_accessor.h b/ydb/core/tx/columnshard/engines/portions/data_accessor.h index aacf90a9a4ad..aef5c151a32d 100644 --- a/ydb/core/tx/columnshard/engines/portions/data_accessor.h +++ b/ydb/core/tx/columnshard/engines/portions/data_accessor.h @@ -17,6 +17,25 @@ class TPortionDataAccessor { private: const TPortionInfo* PortionInfo; + template + static void CheckChunksOrder(const std::vector& chunks) { + ui32 entityId = 0; + ui32 chunkIdx = 0; + for (auto&& i : chunks) { + if (entityId != i.GetEntityId()) { + AFL_VERIFY(entityId < i.GetEntityId()); + AFL_VERIFY(i.GetChunkIdx() == 0); + entityId = i.GetEntityId(); + chunkIdx = 0; + } else { + AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); + chunkIdx = i.GetChunkIdx(); + } + } + } + + void FullValidation() const; + public: template static void AggregateIndexChunksData( @@ -46,23 +65,6 @@ class TPortionDataAccessor { } } - template - static void CheckChunksOrder(const std::vector& chunks) { - ui32 entityId = 0; - ui32 chunkIdx = 0; - for (auto&& i : chunks) { - if (entityId != i.GetEntityId()) { - AFL_VERIFY(entityId < i.GetEntityId()); - AFL_VERIFY(i.GetChunkIdx() == 0); - entityId = i.GetEntityId(); - chunkIdx = 0; - } else { - AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); - chunkIdx = i.GetChunkIdx(); - } - } - } - TPortionDataAccessor(const TPortionInfo& portionInfo) : PortionInfo(&portionInfo) { } @@ -75,6 +77,9 @@ class TPortionDataAccessor { return result; } + void RemoveFromDatabase(IDbWrapper& db) const; + void SaveToDatabase(IDbWrapper& db, const ui32 firstPKColumnId, const bool saveOnlyMeta) const; + NArrow::NSplitter::TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { NArrow::NSplitter::TSerializationStats result; for (auto&& i : PortionInfo->Records) { diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 3d21a5fd104a..d8b07f7773aa 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -57,29 +57,6 @@ TString TPortionInfo::DebugString(const bool withDetails) const { return sb << ")"; } -void TPortionInfo::RemoveFromDatabase(IDbWrapper& db) const { - db.ErasePortion(*this); - for (auto& record : Records) { - db.EraseColumn(*this, record); - } - for (auto& record : Indexes) { - db.EraseIndex(*this, record); - } -} - -void TPortionInfo::SaveToDatabase(IDbWrapper& db, const ui32 firstPKColumnId, const bool saveOnlyMeta) const { - FullValidation(); - db.WritePortion(*this); - if (!saveOnlyMeta) { - for (auto& record : Records) { - db.WriteColumn(*this, record, firstPKColumnId); - } - for (auto& record : Indexes) { - db.WriteIndex(*this, record); - } - } -} - ui64 TPortionInfo::GetMetadataMemorySize() const { return sizeof(TPortionInfo) + Records.size() * (sizeof(TColumnRecord) + 8) + Indexes.size() * sizeof(TIndexChunk) + BlobIds.size() * sizeof(TUnifiedBlobId) - sizeof(TPortionMeta) + Meta.GetMetadataMemorySize(); @@ -198,29 +175,6 @@ ISnapshotSchema::TPtr TPortionInfo::GetSchema(const TVersionedIndex& index) cons return index.GetSchema(MinSnapshotDeprecated); } -void TPortionInfo::FullValidation() const { - TPortionDataAccessor::CheckChunksOrder(Records); - TPortionDataAccessor::CheckChunksOrder(Indexes); - AFL_VERIFY(PathId); - AFL_VERIFY(PortionId); - AFL_VERIFY(MinSnapshotDeprecated.Valid()); - std::set blobIdxs; - for (auto&& i : Records) { - blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); - } - for (auto&& i : Indexes) { - if (auto bRange = i.GetBlobRangeOptional()) { - blobIdxs.emplace(bRange->GetBlobIdxVerified()); - } - } - if (BlobIds.size()) { - AFL_VERIFY(BlobIds.size() == blobIdxs.size()); - AFL_VERIFY(BlobIds.size() == *blobIdxs.rbegin() + 1); - } else { - AFL_VERIFY(blobIdxs.empty()); - } -} - ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoConstructor& portion) { if (!CurrentSchema || portion.GetMinSnapshotDeprecatedVerified() != LastSnapshot) { CurrentSchema = portion.GetSchema(VersionedIndex); diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 97fe59e41c31..ee8d4b400ad6 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -206,8 +206,6 @@ class TPortionInfo { return (RuntimeFeatures & (TRuntimeFeatures)feature); } - void FullValidation() const; - const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); } @@ -235,10 +233,6 @@ class TPortionInfo { return PathId; } - void RemoveFromDatabase(IDbWrapper& db) const; - - void SaveToDatabase(IDbWrapper& db, const ui32 firstPKColumnId, const bool saveOnlyMeta) const; - bool OlderThen(const TPortionInfo& info) const { return RecordSnapshotMin() < info.RecordSnapshotMin(); } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index 0d21f30dabcb..65c0d6eaf177 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -160,7 +160,7 @@ class TGranuleMeta: TNonCopyable { auto copy = *portion; modifier(copy); TDbWrapper wrapper(db, nullptr); - copy.SaveToDatabase(wrapper, 0, true); + TPortionDataAccessor(copy).SaveToDatabase(wrapper, 0, true); } template @@ -172,10 +172,10 @@ class TGranuleMeta: TNonCopyable { } void InsertPortionOnExecute( - NTabletFlatExecutor::TTransactionContext& txc, const std::shared_ptr& portion) const { + NTabletFlatExecutor::TTransactionContext& txc, const TPortionDataAccessor& portion) const { AFL_VERIFY(!InsertedPortions.contains(portion->GetInsertWriteIdVerified())); TDbWrapper wrapper(txc.DB, nullptr); - portion->SaveToDatabase(wrapper, 0, false); + portion.SaveToDatabase(wrapper, 0, false); } void InsertPortionOnComplete(const std::shared_ptr& portion) { From 59f18af4f605f3d1a29d2ab5b92cace3930fd592 Mon Sep 17 00:00:00 2001 From: ivanmorozov333 Date: Mon, 28 Oct 2024 18:32:34 +0300 Subject: [PATCH 31/31] fix --- ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp | 2 +- ydb/core/tx/columnshard/engines/storage/granule/granule.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp index 0c1e5fb77548..43f85b178019 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp @@ -24,7 +24,7 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TC } THashMap> blobIdsByStorage; for (auto&& p : PortionsToDrop) { - p.RemoveFromDatabase(context.DBWrapper); + TPortionDataAccessor(p).RemoveFromDatabase(context.DBWrapper); TPortionDataAccessor(p).FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); pathIds.emplace(p.GetPathId()); } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index 65c0d6eaf177..f13745e3cf3c 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -198,7 +198,7 @@ class TGranuleMeta: TNonCopyable { auto it = InsertedPortions.find(insertWriteId); AFL_VERIFY(it != InsertedPortions.end()); TDbWrapper wrapper(txc.DB, nullptr); - it->second->RemoveFromDatabase(wrapper); + TPortionDataAccessor(*it->second).RemoveFromDatabase(wrapper); } void AbortPortionOnComplete(const TInsertWriteId insertWriteId) {