diff --git a/ydb/core/tx/columnshard/blob.cpp b/ydb/core/tx/columnshard/blob.cpp index b32cff462eb7..ec92f317f225 100644 --- a/ydb/core/tx/columnshard/blob.cpp +++ b/ydb/core/tx/columnshard/blob.cpp @@ -1,218 +1 @@ #include "blob.h" -#include "defs.h" -#include - -#include - -namespace NKikimr::NOlap { - -// Format: "S3-f(logoBlobId)-group" -// Example: "S3-42-72075186224038245_51_31595_2_0_11952_0-2181038103" -TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId) { - TString blobId = dsid.GetLogoBlobId().ToString(); - for (auto&& c : blobId) { - switch (c) { - case ':': - c = '_'; - break; - case '[': - case ']': - c = '-'; - } - } - TString result = - "S3-" + - ::ToString(pathId) + - blobId + - ::ToString(dsid.GetDsGroup()) - ; - return result; -} - -TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId) { - TVector keyBucket; - Split(s, "-", keyBucket); - - ui32 dsGroup; - if (keyBucket.size() != 4 || keyBucket[0] != "S3" - || !TryFromString(keyBucket[3], dsGroup) - || !TryFromString(keyBucket[1], pathId)) - { - error = TStringBuilder() << "Wrong S3 key '" << s << "'"; - return TUnifiedBlobId(); - } - - TString blobId = "[" + keyBucket[2] + "]"; - for (size_t i = 0; i < blobId.size(); ++i) { - switch (blobId[i]) { - case '_': - blobId[i] = ':'; - break; - } - } - - TLogoBlobID logoBlobId; - if (!TLogoBlobID::Parse(logoBlobId, blobId, error)) { - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(dsGroup, logoBlobId); -} - -namespace { - -#define PARSE_INT_COMPONENT(fieldType, fieldName, endChar) \ - if (pos >= endPos) { \ - error = "Failed to parse " #fieldName " component"; \ - return TUnifiedBlobId(); \ - } \ - fieldType fieldName = -1; \ - { \ - auto [ptr, ec] { std::from_chars(str + pos, str + endPos, fieldName) }; \ - if (ec != std::errc()) { \ - error = "Failed to parse " #fieldName " component"; \ - return TUnifiedBlobId(); \ - } else { \ - pos = ptr - str; \ - } \ - if (str[pos++] != endChar) { \ - error = #endChar " not found after " #fieldName; \ - return TUnifiedBlobId(); \ - } \ - } - -// Format: "DS:group:logoBlobId" -// Example: "DS:2181038103:[72075186224038245:51:31595:2:0:11952:0]" -TUnifiedBlobId ParseExtendedDsBlobId(const TString& s, TString& error) { - Y_ABORT_UNLESS(s.size() > 2); - const char* str = s.c_str(); - Y_ABORT_UNLESS(str[0] == 'D' && str[1] == 'S'); - i64 pos = 2; - i64 endPos = s.size(); - if (str[pos++] != ':') { - error = "Starting ':' not found"; - return TUnifiedBlobId(); - } - - PARSE_INT_COMPONENT(ui32, dsGroup, ':'); - - TLogoBlobID logoBlobId; - if (!TLogoBlobID::Parse(logoBlobId, s.substr(pos), error)) { - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(dsGroup, logoBlobId); -} - -// Format: "SM[tabletId:generation:step:cookie:size]" -// Example: "SM[72075186224038245:51:31184:0:2528]" -TUnifiedBlobId ParseSmallBlobId(const TString& s, TString& error) { - Y_ABORT_UNLESS(s.size() > 2); - const char* str = s.c_str(); - Y_ABORT_UNLESS(str[0] == 'S' && str[1] == 'M'); - i64 pos = 2; - i64 endPos = s.size(); - if (str[pos++] != '[') { - error = "opening [ not found"; - return TUnifiedBlobId(); - } - - PARSE_INT_COMPONENT(ui64, tabletId, ':'); - PARSE_INT_COMPONENT(ui32, gen, ':'); - PARSE_INT_COMPONENT(ui32, step, ':'); - PARSE_INT_COMPONENT(ui32, cookie, ':'); - PARSE_INT_COMPONENT(ui32, size, ']'); - - if (pos != endPos) { - error = "Extra characters after closing ]"; - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(tabletId, gen, step, cookie, size); -} - -// Format: "s = S3_key" -TUnifiedBlobId ParseS3BlobId(const TString& s, TString& error) { - ui64 pathId; - TUnifiedBlobId dsBlobId = S3KeyToDsId(s, error, pathId); - if (!dsBlobId.IsValid()) { - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(dsBlobId, TUnifiedBlobId::S3_BLOB, pathId); -} - -} - -TUnifiedBlobId TUnifiedBlobId::ParseFromString(const TString& str, - const IBlobGroupSelector* dsGroupSelector, TString& error) -{ - if (str.size() <= 2) { - error = TStringBuilder() << "Wrong blob id: '" << str << "'"; - return TUnifiedBlobId(); - } - - if (str[0] == '[') { - // If blobId starts with '[' this must be a logoblobId and if channel is set to FAKE_CHANNEL - // this is a fake logoblobid used for small blob - TLogoBlobID logoBlobId; - bool parsed = TLogoBlobID::Parse(logoBlobId, str, error); - if (!parsed) { - error = "Cannot parse TLogoBlobID: " + error; - return TUnifiedBlobId(); - } - if (logoBlobId.Channel() == TSmallBlobId::FAKE_CHANNEL) { - // Small blob - return TUnifiedBlobId(logoBlobId.TabletID(), logoBlobId.Generation(), logoBlobId.Step(), - logoBlobId.Cookie(), logoBlobId.BlobSize()); - } else { - // DS blob - if (!dsGroupSelector) { - error = "Need TBlobGroupSelector to resolve DS group for the blob"; - return TUnifiedBlobId(); - } - return TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); - } - } else if (str[0] == 'D' && str[1] == 'S') { - return ParseExtendedDsBlobId(str, error); - } else if (str[0] == 'S' && str[1] == 'M') { - return ParseSmallBlobId(str, error); - } else if (str[0] == 'S' && str[1] == '3') { - return ParseS3BlobId(str, error); - } - - error = TStringBuilder() << "Wrong blob id: '" << str << "'"; - return TUnifiedBlobId(); -} - -NKikimr::TConclusionStatus TBlobRange::DeserializeFromProto(const NKikimrColumnShardProto::TBlobRange& proto) { - auto parsed = TUnifiedBlobId::BuildFromString(proto.GetBlobId(), nullptr); - if (!parsed) { - return parsed; - } - BlobId = parsed.DetachResult(); - - Offset = proto.GetOffset(); - Size = proto.GetSize(); - return TConclusionStatus::Success(); -} - -NKikimr::TConclusion TBlobRange::BuildFromProto(const NKikimrColumnShardProto::TBlobRange& proto) { - TBlobRange result; - auto parsed = result.DeserializeFromProto(proto); - if (!parsed) { - return parsed; - } else { - return result; - } -} - -NKikimrColumnShardProto::TBlobRange TBlobRange::SerializeToProto() const { - NKikimrColumnShardProto::TBlobRange result; - result.SetBlobId(BlobId.ToStringNew()); - result.SetOffset(Offset); - result.SetSize(Size); - return result; -} - -} diff --git a/ydb/core/tx/columnshard/blob.h b/ydb/core/tx/columnshard/blob.h index 697cae299a09..4a2dad81a540 100644 --- a/ydb/core/tx/columnshard/blob.h +++ b/ydb/core/tx/columnshard/blob.h @@ -1,468 +1,3 @@ #pragma once -#include -#include - -#include - -namespace NKikimrColumnShardProto { -class TBlobRange; -} - -namespace NKikimr::NOlap { - -class IBlobGroupSelector; -class TUnifiedBlobId; - -TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId); -TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId); - -// Encapsulates different types of blob ids to simplify dealing with blobs for the -// components that do not need to know where the blob is stored -// Blob id formats: -// * Old DS blob id: just "logoBlobId" e.g. "[72075186224038245:51:31595:2:0:11952:0]" -// * DS blob id: "DS:dsGroup:logoBlobId" e.g. "DS:2181038103:[72075186224038245:51:31595:2:0:11952:0]" -// * Small blob id: "SM[tabletId:generation:step:cookie:size]" e.g. "SM[72075186224038245:51:31184:0:2528]" -class TUnifiedBlobId { - struct TInvalid { - bool operator == (const TInvalid&) const { return true; } - }; - - // Id of a blob in YDB distributed storage - struct TDsBlobId { - TLogoBlobID BlobId; - ui32 DsGroup; - - bool operator == (const TDsBlobId& other) const { - return BlobId == other.BlobId && DsGroup == other.DsGroup; - } - - TString ToStringNew() const { - return Sprintf( "DS:%" PRIu32 ":%s", DsGroup, BlobId.ToString().c_str()); - } - - TString ToStringLegacy() const { - return BlobId.ToString(); - } - - ui64 Hash() const { - return CombineHashes(BlobId.Hash(), IntHash(DsGroup)); - } - }; - - // Id of a blob that is stored in Tablet local DB table - struct TSmallBlobId { - static constexpr ui8 FAKE_CHANNEL = 255; // Small blob id can be represented as - // a fake TLogoBlobID with channel = FAKE_CHANNEL - - ui64 TabletId; - ui32 Gen; - ui32 Step; - ui32 Cookie; - ui32 Size; - - bool operator == (const TSmallBlobId& other) const { - return TabletId == other.TabletId && - Gen == other.Gen && - Step == other.Step && - Cookie == other.Cookie && - Size == other.Size; - } - - TString ToStringNew() const { - return Sprintf( "SM[%" PRIu64 ":%" PRIu32 ":%" PRIu32 ":%" PRIu32 ":%" PRIu32 "]", - TabletId, Gen, Step, Cookie, Size); - } - - TString ToStringLegacy() const { - // For compatibility with preproduction version small blobs can also be - // addressed by fake TlogoBlobID with channel = 255 - return TLogoBlobID(TabletId, Gen, Step, FAKE_CHANNEL, Size, Cookie).ToString(); - } - - ui64 Hash() const { - ui64 hash = IntHash(TabletId); - hash = CombineHashes(hash, IntHash(Gen)); - hash = CombineHashes(hash, IntHash(Step)); - hash = CombineHashes(hash, IntHash(Cookie)); - hash = CombineHashes(hash, IntHash(Size)); - return hash; - } - }; - - struct TS3BlobId { - TDsBlobId DsBlobId; - TString Key; - - TS3BlobId() = default; - - TS3BlobId(const TUnifiedBlobId& dsBlob, const ui64 pathId) - { - Y_ABORT_UNLESS(dsBlob.IsDsBlob()); - DsBlobId = std::get(dsBlob.Id); - Key = DsIdToS3Key(dsBlob, pathId); - } - - bool operator == (const TS3BlobId& other) const { - return Key == other.Key; - } - - TString ToStringNew() const { - return Sprintf("%s", Key.c_str()); - } - - ui64 Hash() const { - return IntHash(THash()(Key)); - } - }; - - std::variant< - TInvalid, - TDsBlobId, - TSmallBlobId, - TS3BlobId - > Id; - -public: - enum EBlobType { - INVALID = 0, - DS_BLOB = 1, - TABLET_SMALL_BLOB = 2, - S3_BLOB = 3, - }; - - TUnifiedBlobId() - : Id(TInvalid()) - {} - - // Initialize as DS blob Id - TUnifiedBlobId(ui32 dsGroup, const TLogoBlobID& logoBlobId) - : Id(TDsBlobId{logoBlobId, dsGroup}) - {} - - // Initialize as Small blob Id - TUnifiedBlobId(ui64 tabletId, ui32 gen, ui32 step, ui32 cookie, ui32 size) - : Id(TSmallBlobId{tabletId, gen, step, cookie, size}) - {} - - // Make S3 blob Id from DS one - TUnifiedBlobId(const TUnifiedBlobId& blob, EBlobType type, const ui64 pathId) - : Id(TS3BlobId(blob, pathId)) - { - Y_ABORT_UNLESS(type == S3_BLOB); - } - - TUnifiedBlobId(const TUnifiedBlobId& other) = default; - TUnifiedBlobId& operator = (const TUnifiedBlobId& logoBlobId) = default; - TUnifiedBlobId(TUnifiedBlobId&& other) = default; - TUnifiedBlobId& operator = (TUnifiedBlobId&& logoBlobId) = default; - - static TConclusion BuildFromString(const TString& id, const IBlobGroupSelector* dsGroupSelector) { - TString error; - TUnifiedBlobId result = ParseFromString(id, dsGroupSelector, error); - if (!result.IsValid()) { - return TConclusionStatus::Fail(error); - } - return result; - } - - TUnifiedBlobId MakeS3BlobId(ui64 pathId) const { - Y_ABORT_UNLESS(IsDsBlob()); - return TUnifiedBlobId(*this, TUnifiedBlobId::S3_BLOB, pathId); - } - - static TUnifiedBlobId ParseFromString(const TString& str, - const IBlobGroupSelector* dsGroupSelector, TString& error); - - bool operator == (const TUnifiedBlobId& other) const { - return Id == other.Id; - } - - EBlobType GetType() const { - return (EBlobType)Id.index(); - } - - bool IsValid() const { - return Id.index() != INVALID; - } - - size_t BlobSize() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).BlobId.BlobSize(); - case TABLET_SMALL_BLOB: - return std::get(Id).Size; - case S3_BLOB: - return std::get(Id).DsBlobId.BlobId.BlobSize(); - case INVALID: - Y_ABORT("Invalid blob id"); - } - Y_ABORT(); - } - - bool IsSmallBlob() const { - return GetType() == TABLET_SMALL_BLOB; - } - - bool IsDsBlob() const { - return GetType() == DS_BLOB; - } - - bool IsS3Blob() const { - return GetType() == S3_BLOB; - } - - TLogoBlobID GetLogoBlobId() const { - Y_ABORT_UNLESS(IsDsBlob()); - return std::get(Id).BlobId; - } - - ui32 GetDsGroup() const { - Y_ABORT_UNLESS(IsDsBlob()); - return std::get(Id).DsGroup; - } - - TString GetS3Key() const { - Y_ABORT_UNLESS(IsS3Blob()); - return std::get(Id).Key; - } - - ui64 GetTabletId() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).BlobId.TabletID(); - case TABLET_SMALL_BLOB: - return std::get(Id).TabletId; - case S3_BLOB: - return std::get(Id).DsBlobId.BlobId.TabletID(); - case INVALID: - Y_ABORT("Invalid blob id"); - } - Y_ABORT(); - } - - ui64 Hash() const noexcept { - switch (Id.index()) { - case INVALID: - return 0; - case DS_BLOB: - return std::get(Id).Hash(); - case TABLET_SMALL_BLOB: - return std::get(Id).Hash(); - case S3_BLOB: - return std::get(Id).Hash(); - } - Y_ABORT(); - } - - // This is only implemented for DS for backward compatibility with persisted data. - // All new functionality should rahter use string blob id representation - TString SerializeBinary() const { - Y_ABORT_UNLESS(IsDsBlob()); - return TString((const char*)GetLogoBlobId().GetRaw(), sizeof(TLogoBlobID)); - } - - TString ToStringLegacy() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).ToStringLegacy(); - case TABLET_SMALL_BLOB: - return std::get(Id).ToStringLegacy(); - case S3_BLOB: - Y_ABORT("Not implemented"); - case INVALID: - return ""; - } - Y_ABORT(); - } - - TString ToStringNew() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).ToStringNew(); - case TABLET_SMALL_BLOB: - return std::get(Id).ToStringNew(); - case S3_BLOB: - return std::get(Id).ToStringNew(); - case INVALID: - return ""; - } - Y_ABORT(); - } -}; - - -// Describes a range of bytes in a blob. It is used for read requests and for caching. -struct TBlobRange { - TUnifiedBlobId BlobId; - ui32 Offset; - ui32 Size; - - const TUnifiedBlobId& GetBlobId() const { - return BlobId; - } - - bool IsValid() const { - return BlobId.IsValid() && Size && Offset + Size <= BlobId.BlobSize(); - } - - ui32 GetBlobSize() const { - return Size; - } - - bool IsFullBlob() const { - return Size == BlobId.BlobSize(); - } - - explicit TBlobRange(const TUnifiedBlobId& blobId = TUnifiedBlobId(), ui32 offset = 0, ui32 size = 0) - : BlobId(blobId) - , Offset(offset) - , Size(size) - { - if (Size > 0) { - Y_ABORT_UNLESS(Offset < BlobId.BlobSize()); - Y_ABORT_UNLESS(Offset + Size <= BlobId.BlobSize()); - } - } - - static TBlobRange FromBlobId(const TUnifiedBlobId& blobId) { - return TBlobRange(blobId, 0, blobId.BlobSize()); - } - - bool operator == (const TBlobRange& other) const { - return - BlobId == other.BlobId && - Offset == other.Offset && - Size == other.Size; - } - - ui64 Hash() const noexcept { - ui64 hash = BlobId.Hash(); - hash = CombineHashes(hash, IntHash(Offset)); - hash = CombineHashes(hash, IntHash(Size)); - return hash; - } - - TString ToString() const { - return Sprintf("{ Blob: %s Offset: %" PRIu32 " Size: %" PRIu32 " }", - BlobId.ToStringNew().c_str(), Offset, Size); - } - - NKikimrColumnShardProto::TBlobRange SerializeToProto() const; - - TConclusionStatus DeserializeFromProto(const NKikimrColumnShardProto::TBlobRange& proto); - - static TConclusion BuildFromProto(const NKikimrColumnShardProto::TBlobRange& proto); -}; - -class IBlobInUseTracker { -private: - virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& blobId) = 0; - virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& blobId) = 0; -public: - virtual ~IBlobInUseTracker() = default; - - bool FreeBlob(const NOlap::TUnifiedBlobId& blobId) { - return DoFreeBlob(blobId); - } - bool UseBlob(const NOlap::TUnifiedBlobId& blobId) { - return DoUseBlob(blobId); - } - - virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const = 0; -}; - -// Expected blob lifecycle: EVICTING -> SELF_CACHED -> EXTERN <-> CACHED -enum class EEvictState : ui8 { - UNKNOWN = 0, - EVICTING = 1, // source, extern, cached blobs: 1-- - SELF_CACHED = 2, // source, extern, cached blobs: 11- - EXTERN = 3, // source, extern, cached blobs: -1- - CACHED = 4, // source, extern, cached blobs: -11 - ERASING = 5, // source, extern, cached blobs: -?? - //ERASED = 6, // source, extern, cached blobs: --- -}; - -inline bool IsExported(EEvictState state) { - return state == EEvictState::SELF_CACHED || - state == EEvictState::EXTERN || - state == EEvictState::CACHED; -} - -inline bool CouldBeExported(EEvictState state) { - return state == EEvictState::SELF_CACHED || - state == EEvictState::EXTERN || - state == EEvictState::CACHED || - state == EEvictState::ERASING; -} - -inline bool IsDeleted(EEvictState state) { - return ui8(state) >= ui8(EEvictState::EXTERN); // !EVICTING and !SELF_CACHED -} - -struct TEvictedBlob { - EEvictState State = EEvictState::UNKNOWN; - TUnifiedBlobId Blob; - TUnifiedBlobId ExternBlob; - TUnifiedBlobId CachedBlob; - - bool operator == (const TEvictedBlob& other) const { - return Blob == other.Blob; - } - - ui64 Hash() const noexcept { - return Blob.Hash(); - } - - bool IsEvicting() const { - return State == EEvictState::EVICTING; - } - - bool IsExternal() const { - if (State == EEvictState::EXTERN) { - Y_ABORT_UNLESS(ExternBlob.IsValid()); - return true; - } - return false; - } - - TString ToString() const { - return TStringBuilder() << "state: " << (ui32)State - << " blob: " << Blob.ToStringNew() - << " extern: " << ExternBlob.ToStringNew() - << " cached: " << CachedBlob.ToStringNew(); - } -}; - -} - -inline -IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TUnifiedBlobId& blobId) { - return out << blobId.ToStringNew(); -} - -inline -IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TBlobRange& blobRange) { - return out << blobRange.ToString(); -} - -template<> -struct ::THash { - inline ui64 operator()(const NKikimr::NOlap::TUnifiedBlobId& a) const { - return a.Hash(); - } -}; - -template <> -struct THash { - inline size_t operator() (const NKikimr::NOlap::TBlobRange& key) const { - return key.Hash(); - } -}; - -template <> -struct THash { - inline size_t operator() (const NKikimr::NOlap::TEvictedBlob& key) const { - return key.Hash(); - } -}; +#include "common/blob.h" diff --git a/ydb/core/tx/columnshard/common/blob.cpp b/ydb/core/tx/columnshard/common/blob.cpp new file mode 100644 index 000000000000..13aafcd818e2 --- /dev/null +++ b/ydb/core/tx/columnshard/common/blob.cpp @@ -0,0 +1,217 @@ +#include "blob.h" +#include + +#include + +namespace NKikimr::NOlap { + +// Format: "S3-f(logoBlobId)-group" +// Example: "S3-42-72075186224038245_51_31595_2_0_11952_0-2181038103" +TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId) { + TString blobId = dsid.GetLogoBlobId().ToString(); + for (auto&& c : blobId) { + switch (c) { + case ':': + c = '_'; + break; + case '[': + case ']': + c = '-'; + } + } + TString result = + "S3-" + + ::ToString(pathId) + + blobId + + ::ToString(dsid.GetDsGroup()) + ; + return result; +} + +TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId) { + TVector keyBucket; + Split(s, "-", keyBucket); + + ui32 dsGroup; + if (keyBucket.size() != 4 || keyBucket[0] != "S3" + || !TryFromString(keyBucket[3], dsGroup) + || !TryFromString(keyBucket[1], pathId)) + { + error = TStringBuilder() << "Wrong S3 key '" << s << "'"; + return TUnifiedBlobId(); + } + + TString blobId = "[" + keyBucket[2] + "]"; + for (size_t i = 0; i < blobId.size(); ++i) { + switch (blobId[i]) { + case '_': + blobId[i] = ':'; + break; + } + } + + TLogoBlobID logoBlobId; + if (!TLogoBlobID::Parse(logoBlobId, blobId, error)) { + return TUnifiedBlobId(); + } + + return TUnifiedBlobId(dsGroup, logoBlobId); +} + +namespace { + +#define PARSE_INT_COMPONENT(fieldType, fieldName, endChar) \ + if (pos >= endPos) { \ + error = "Failed to parse " #fieldName " component"; \ + return TUnifiedBlobId(); \ + } \ + fieldType fieldName = -1; \ + { \ + auto [ptr, ec] { std::from_chars(str + pos, str + endPos, fieldName) }; \ + if (ec != std::errc()) { \ + error = "Failed to parse " #fieldName " component"; \ + return TUnifiedBlobId(); \ + } else { \ + pos = ptr - str; \ + } \ + if (str[pos++] != endChar) { \ + error = #endChar " not found after " #fieldName; \ + return TUnifiedBlobId(); \ + } \ + } + +// Format: "DS:group:logoBlobId" +// Example: "DS:2181038103:[72075186224038245:51:31595:2:0:11952:0]" +TUnifiedBlobId ParseExtendedDsBlobId(const TString& s, TString& error) { + Y_ABORT_UNLESS(s.size() > 2); + const char* str = s.c_str(); + Y_ABORT_UNLESS(str[0] == 'D' && str[1] == 'S'); + i64 pos = 2; + i64 endPos = s.size(); + if (str[pos++] != ':') { + error = "Starting ':' not found"; + return TUnifiedBlobId(); + } + + PARSE_INT_COMPONENT(ui32, dsGroup, ':'); + + TLogoBlobID logoBlobId; + if (!TLogoBlobID::Parse(logoBlobId, s.substr(pos), error)) { + return TUnifiedBlobId(); + } + + return TUnifiedBlobId(dsGroup, logoBlobId); +} + +// Format: "SM[tabletId:generation:step:cookie:size]" +// Example: "SM[72075186224038245:51:31184:0:2528]" +TUnifiedBlobId ParseSmallBlobId(const TString& s, TString& error) { + Y_ABORT_UNLESS(s.size() > 2); + const char* str = s.c_str(); + Y_ABORT_UNLESS(str[0] == 'S' && str[1] == 'M'); + i64 pos = 2; + i64 endPos = s.size(); + if (str[pos++] != '[') { + error = "opening [ not found"; + return TUnifiedBlobId(); + } + + PARSE_INT_COMPONENT(ui64, tabletId, ':'); + PARSE_INT_COMPONENT(ui32, gen, ':'); + PARSE_INT_COMPONENT(ui32, step, ':'); + PARSE_INT_COMPONENT(ui32, cookie, ':'); + PARSE_INT_COMPONENT(ui32, size, ']'); + + if (pos != endPos) { + error = "Extra characters after closing ]"; + return TUnifiedBlobId(); + } + + return TUnifiedBlobId(tabletId, gen, step, cookie, size); +} + +// Format: "s = S3_key" +TUnifiedBlobId ParseS3BlobId(const TString& s, TString& error) { + ui64 pathId; + TUnifiedBlobId dsBlobId = S3KeyToDsId(s, error, pathId); + if (!dsBlobId.IsValid()) { + return TUnifiedBlobId(); + } + + return TUnifiedBlobId(dsBlobId, TUnifiedBlobId::S3_BLOB, pathId); +} + +} + +TUnifiedBlobId TUnifiedBlobId::ParseFromString(const TString& str, + const IBlobGroupSelector* dsGroupSelector, TString& error) +{ + if (str.size() <= 2) { + error = TStringBuilder() << "Wrong blob id: '" << str << "'"; + return TUnifiedBlobId(); + } + + if (str[0] == '[') { + // If blobId starts with '[' this must be a logoblobId and if channel is set to FAKE_CHANNEL + // this is a fake logoblobid used for small blob + TLogoBlobID logoBlobId; + bool parsed = TLogoBlobID::Parse(logoBlobId, str, error); + if (!parsed) { + error = "Cannot parse TLogoBlobID: " + error; + return TUnifiedBlobId(); + } + if (logoBlobId.Channel() == TSmallBlobId::FAKE_CHANNEL) { + // Small blob + return TUnifiedBlobId(logoBlobId.TabletID(), logoBlobId.Generation(), logoBlobId.Step(), + logoBlobId.Cookie(), logoBlobId.BlobSize()); + } else { + // DS blob + if (!dsGroupSelector) { + error = "Need TBlobGroupSelector to resolve DS group for the blob"; + return TUnifiedBlobId(); + } + return TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); + } + } else if (str[0] == 'D' && str[1] == 'S') { + return ParseExtendedDsBlobId(str, error); + } else if (str[0] == 'S' && str[1] == 'M') { + return ParseSmallBlobId(str, error); + } else if (str[0] == 'S' && str[1] == '3') { + return ParseS3BlobId(str, error); + } + + error = TStringBuilder() << "Wrong blob id: '" << str << "'"; + return TUnifiedBlobId(); +} + +NKikimr::TConclusionStatus TBlobRange::DeserializeFromProto(const NKikimrColumnShardProto::TBlobRange& proto) { + auto parsed = TUnifiedBlobId::BuildFromString(proto.GetBlobId(), nullptr); + if (!parsed) { + return parsed; + } + BlobId = parsed.DetachResult(); + + Offset = proto.GetOffset(); + Size = proto.GetSize(); + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TBlobRange::BuildFromProto(const NKikimrColumnShardProto::TBlobRange& proto) { + TBlobRange result; + auto parsed = result.DeserializeFromProto(proto); + if (!parsed) { + return parsed; + } else { + return result; + } +} + +NKikimrColumnShardProto::TBlobRange TBlobRange::SerializeToProto() const { + NKikimrColumnShardProto::TBlobRange result; + result.SetBlobId(BlobId.ToStringNew()); + result.SetOffset(Offset); + result.SetSize(Size); + return result; +} + +} diff --git a/ydb/core/tx/columnshard/common/blob.h b/ydb/core/tx/columnshard/common/blob.h new file mode 100644 index 000000000000..b01d1cb5b9d0 --- /dev/null +++ b/ydb/core/tx/columnshard/common/blob.h @@ -0,0 +1,475 @@ +#pragma once + +#include +#include + +#include + +namespace NKikimrColumnShardProto { +class TBlobRange; +} + +namespace NKikimr::NOlap { + +class IBlobGroupSelector { +protected: + virtual ~IBlobGroupSelector() = default; + +public: + virtual ui32 GetGroup(const TLogoBlobID& blobId) const = 0; +}; + +class TUnifiedBlobId; + +TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId); +TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId); + +// Encapsulates different types of blob ids to simplify dealing with blobs for the +// components that do not need to know where the blob is stored +// Blob id formats: +// * Old DS blob id: just "logoBlobId" e.g. "[72075186224038245:51:31595:2:0:11952:0]" +// * DS blob id: "DS:dsGroup:logoBlobId" e.g. "DS:2181038103:[72075186224038245:51:31595:2:0:11952:0]" +// * Small blob id: "SM[tabletId:generation:step:cookie:size]" e.g. "SM[72075186224038245:51:31184:0:2528]" +class TUnifiedBlobId { + struct TInvalid { + bool operator == (const TInvalid&) const { return true; } + }; + + // Id of a blob in YDB distributed storage + struct TDsBlobId { + TLogoBlobID BlobId; + ui32 DsGroup; + + bool operator == (const TDsBlobId& other) const { + return BlobId == other.BlobId && DsGroup == other.DsGroup; + } + + TString ToStringNew() const { + return Sprintf( "DS:%" PRIu32 ":%s", DsGroup, BlobId.ToString().c_str()); + } + + TString ToStringLegacy() const { + return BlobId.ToString(); + } + + ui64 Hash() const { + return CombineHashes(BlobId.Hash(), IntHash(DsGroup)); + } + }; + + // Id of a blob that is stored in Tablet local DB table + struct TSmallBlobId { + static constexpr ui8 FAKE_CHANNEL = 255; // Small blob id can be represented as + // a fake TLogoBlobID with channel = FAKE_CHANNEL + + ui64 TabletId; + ui32 Gen; + ui32 Step; + ui32 Cookie; + ui32 Size; + + bool operator == (const TSmallBlobId& other) const { + return TabletId == other.TabletId && + Gen == other.Gen && + Step == other.Step && + Cookie == other.Cookie && + Size == other.Size; + } + + TString ToStringNew() const { + return Sprintf( "SM[%" PRIu64 ":%" PRIu32 ":%" PRIu32 ":%" PRIu32 ":%" PRIu32 "]", + TabletId, Gen, Step, Cookie, Size); + } + + TString ToStringLegacy() const { + // For compatibility with preproduction version small blobs can also be + // addressed by fake TlogoBlobID with channel = 255 + return TLogoBlobID(TabletId, Gen, Step, FAKE_CHANNEL, Size, Cookie).ToString(); + } + + ui64 Hash() const { + ui64 hash = IntHash(TabletId); + hash = CombineHashes(hash, IntHash(Gen)); + hash = CombineHashes(hash, IntHash(Step)); + hash = CombineHashes(hash, IntHash(Cookie)); + hash = CombineHashes(hash, IntHash(Size)); + return hash; + } + }; + + struct TS3BlobId { + TDsBlobId DsBlobId; + TString Key; + + TS3BlobId() = default; + + TS3BlobId(const TUnifiedBlobId& dsBlob, const ui64 pathId) + { + Y_ABORT_UNLESS(dsBlob.IsDsBlob()); + DsBlobId = std::get(dsBlob.Id); + Key = DsIdToS3Key(dsBlob, pathId); + } + + bool operator == (const TS3BlobId& other) const { + return Key == other.Key; + } + + TString ToStringNew() const { + return Sprintf("%s", Key.c_str()); + } + + ui64 Hash() const { + return IntHash(THash()(Key)); + } + }; + + std::variant< + TInvalid, + TDsBlobId, + TSmallBlobId, + TS3BlobId + > Id; + +public: + enum EBlobType { + INVALID = 0, + DS_BLOB = 1, + TABLET_SMALL_BLOB = 2, + S3_BLOB = 3, + }; + + TUnifiedBlobId() + : Id(TInvalid()) + {} + + // Initialize as DS blob Id + TUnifiedBlobId(ui32 dsGroup, const TLogoBlobID& logoBlobId) + : Id(TDsBlobId{logoBlobId, dsGroup}) + {} + + // Initialize as Small blob Id + TUnifiedBlobId(ui64 tabletId, ui32 gen, ui32 step, ui32 cookie, ui32 size) + : Id(TSmallBlobId{tabletId, gen, step, cookie, size}) + {} + + // Make S3 blob Id from DS one + TUnifiedBlobId(const TUnifiedBlobId& blob, EBlobType type, const ui64 pathId) + : Id(TS3BlobId(blob, pathId)) + { + Y_ABORT_UNLESS(type == S3_BLOB); + } + + TUnifiedBlobId(const TUnifiedBlobId& other) = default; + TUnifiedBlobId& operator = (const TUnifiedBlobId& logoBlobId) = default; + TUnifiedBlobId(TUnifiedBlobId&& other) = default; + TUnifiedBlobId& operator = (TUnifiedBlobId&& logoBlobId) = default; + + static TConclusion BuildFromString(const TString& id, const IBlobGroupSelector* dsGroupSelector) { + TString error; + TUnifiedBlobId result = ParseFromString(id, dsGroupSelector, error); + if (!result.IsValid()) { + return TConclusionStatus::Fail(error); + } + return result; + } + + TUnifiedBlobId MakeS3BlobId(ui64 pathId) const { + Y_ABORT_UNLESS(IsDsBlob()); + return TUnifiedBlobId(*this, TUnifiedBlobId::S3_BLOB, pathId); + } + + static TUnifiedBlobId ParseFromString(const TString& str, + const IBlobGroupSelector* dsGroupSelector, TString& error); + + bool operator == (const TUnifiedBlobId& other) const { + return Id == other.Id; + } + + EBlobType GetType() const { + return (EBlobType)Id.index(); + } + + bool IsValid() const { + return Id.index() != INVALID; + } + + size_t BlobSize() const { + switch (Id.index()) { + case DS_BLOB: + return std::get(Id).BlobId.BlobSize(); + case TABLET_SMALL_BLOB: + return std::get(Id).Size; + case S3_BLOB: + return std::get(Id).DsBlobId.BlobId.BlobSize(); + case INVALID: + Y_ABORT("Invalid blob id"); + } + Y_ABORT(); + } + + bool IsSmallBlob() const { + return GetType() == TABLET_SMALL_BLOB; + } + + bool IsDsBlob() const { + return GetType() == DS_BLOB; + } + + bool IsS3Blob() const { + return GetType() == S3_BLOB; + } + + TLogoBlobID GetLogoBlobId() const { + Y_ABORT_UNLESS(IsDsBlob()); + return std::get(Id).BlobId; + } + + ui32 GetDsGroup() const { + Y_ABORT_UNLESS(IsDsBlob()); + return std::get(Id).DsGroup; + } + + TString GetS3Key() const { + Y_ABORT_UNLESS(IsS3Blob()); + return std::get(Id).Key; + } + + ui64 GetTabletId() const { + switch (Id.index()) { + case DS_BLOB: + return std::get(Id).BlobId.TabletID(); + case TABLET_SMALL_BLOB: + return std::get(Id).TabletId; + case S3_BLOB: + return std::get(Id).DsBlobId.BlobId.TabletID(); + case INVALID: + Y_ABORT("Invalid blob id"); + } + Y_ABORT(); + } + + ui64 Hash() const noexcept { + switch (Id.index()) { + case INVALID: + return 0; + case DS_BLOB: + return std::get(Id).Hash(); + case TABLET_SMALL_BLOB: + return std::get(Id).Hash(); + case S3_BLOB: + return std::get(Id).Hash(); + } + Y_ABORT(); + } + + // This is only implemented for DS for backward compatibility with persisted data. + // All new functionality should rahter use string blob id representation + TString SerializeBinary() const { + Y_ABORT_UNLESS(IsDsBlob()); + return TString((const char*)GetLogoBlobId().GetRaw(), sizeof(TLogoBlobID)); + } + + TString ToStringLegacy() const { + switch (Id.index()) { + case DS_BLOB: + return std::get(Id).ToStringLegacy(); + case TABLET_SMALL_BLOB: + return std::get(Id).ToStringLegacy(); + case S3_BLOB: + Y_ABORT("Not implemented"); + case INVALID: + return ""; + } + Y_ABORT(); + } + + TString ToStringNew() const { + switch (Id.index()) { + case DS_BLOB: + return std::get(Id).ToStringNew(); + case TABLET_SMALL_BLOB: + return std::get(Id).ToStringNew(); + case S3_BLOB: + return std::get(Id).ToStringNew(); + case INVALID: + return ""; + } + Y_ABORT(); + } +}; + + +// Describes a range of bytes in a blob. It is used for read requests and for caching. +struct TBlobRange { + TUnifiedBlobId BlobId; + ui32 Offset; + ui32 Size; + + const TUnifiedBlobId& GetBlobId() const { + return BlobId; + } + + bool IsValid() const { + return BlobId.IsValid() && Size && Offset + Size <= BlobId.BlobSize(); + } + + ui32 GetBlobSize() const { + return Size; + } + + bool IsFullBlob() const { + return Size == BlobId.BlobSize(); + } + + explicit TBlobRange(const TUnifiedBlobId& blobId = TUnifiedBlobId(), ui32 offset = 0, ui32 size = 0) + : BlobId(blobId) + , Offset(offset) + , Size(size) + { + if (Size > 0) { + Y_ABORT_UNLESS(Offset < BlobId.BlobSize()); + Y_ABORT_UNLESS(Offset + Size <= BlobId.BlobSize()); + } + } + + static TBlobRange FromBlobId(const TUnifiedBlobId& blobId) { + return TBlobRange(blobId, 0, blobId.BlobSize()); + } + + bool operator == (const TBlobRange& other) const { + return + BlobId == other.BlobId && + Offset == other.Offset && + Size == other.Size; + } + + ui64 Hash() const noexcept { + ui64 hash = BlobId.Hash(); + hash = CombineHashes(hash, IntHash(Offset)); + hash = CombineHashes(hash, IntHash(Size)); + return hash; + } + + TString ToString() const { + return Sprintf("{ Blob: %s Offset: %" PRIu32 " Size: %" PRIu32 " }", + BlobId.ToStringNew().c_str(), Offset, Size); + } + + NKikimrColumnShardProto::TBlobRange SerializeToProto() const; + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardProto::TBlobRange& proto); + + static TConclusion BuildFromProto(const NKikimrColumnShardProto::TBlobRange& proto); +}; + +class IBlobInUseTracker { +private: + virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& blobId) = 0; + virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& blobId) = 0; +public: + virtual ~IBlobInUseTracker() = default; + + bool FreeBlob(const NOlap::TUnifiedBlobId& blobId) { + return DoFreeBlob(blobId); + } + bool UseBlob(const NOlap::TUnifiedBlobId& blobId) { + return DoUseBlob(blobId); + } + + virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const = 0; +}; + +// Expected blob lifecycle: EVICTING -> SELF_CACHED -> EXTERN <-> CACHED +enum class EEvictState : ui8 { + UNKNOWN = 0, + EVICTING = 1, // source, extern, cached blobs: 1-- + SELF_CACHED = 2, // source, extern, cached blobs: 11- + EXTERN = 3, // source, extern, cached blobs: -1- + CACHED = 4, // source, extern, cached blobs: -11 + ERASING = 5, // source, extern, cached blobs: -?? + //ERASED = 6, // source, extern, cached blobs: --- +}; + +inline bool IsExported(EEvictState state) { + return state == EEvictState::SELF_CACHED || + state == EEvictState::EXTERN || + state == EEvictState::CACHED; +} + +inline bool CouldBeExported(EEvictState state) { + return state == EEvictState::SELF_CACHED || + state == EEvictState::EXTERN || + state == EEvictState::CACHED || + state == EEvictState::ERASING; +} + +inline bool IsDeleted(EEvictState state) { + return ui8(state) >= ui8(EEvictState::EXTERN); // !EVICTING and !SELF_CACHED +} + +struct TEvictedBlob { + EEvictState State = EEvictState::UNKNOWN; + TUnifiedBlobId Blob; + TUnifiedBlobId ExternBlob; + TUnifiedBlobId CachedBlob; + + bool operator == (const TEvictedBlob& other) const { + return Blob == other.Blob; + } + + ui64 Hash() const noexcept { + return Blob.Hash(); + } + + bool IsEvicting() const { + return State == EEvictState::EVICTING; + } + + bool IsExternal() const { + if (State == EEvictState::EXTERN) { + Y_ABORT_UNLESS(ExternBlob.IsValid()); + return true; + } + return false; + } + + TString ToString() const { + return TStringBuilder() << "state: " << (ui32)State + << " blob: " << Blob.ToStringNew() + << " extern: " << ExternBlob.ToStringNew() + << " cached: " << CachedBlob.ToStringNew(); + } +}; + +} + +inline +IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TUnifiedBlobId& blobId) { + return out << blobId.ToStringNew(); +} + +inline +IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TBlobRange& blobRange) { + return out << blobRange.ToString(); +} + +template<> +struct ::THash { + inline ui64 operator()(const NKikimr::NOlap::TUnifiedBlobId& a) const { + return a.Hash(); + } +}; + +template <> +struct THash { + inline size_t operator() (const NKikimr::NOlap::TBlobRange& key) const { + return key.Hash(); + } +}; + +template <> +struct THash { + inline size_t operator() (const NKikimr::NOlap::TEvictedBlob& key) const { + return key.Hash(); + } +}; diff --git a/ydb/core/tx/columnshard/common/ya.make b/ydb/core/tx/columnshard/common/ya.make index aaec1a2c2d69..993b47fff695 100644 --- a/ydb/core/tx/columnshard/common/ya.make +++ b/ydb/core/tx/columnshard/common/ya.make @@ -7,6 +7,7 @@ SRCS( snapshot.cpp portion.cpp tablet_id.cpp + blob.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/defs.h b/ydb/core/tx/columnshard/defs.h index f3d5c90bc181..76d1cc3a2bb0 100644 --- a/ydb/core/tx/columnshard/defs.h +++ b/ydb/core/tx/columnshard/defs.h @@ -1,4 +1,5 @@ #pragma once +#include "common/blob.h" #include #include #include diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.h b/ydb/core/tx/columnshard/engines/db_wrapper.h index d29b9e300a2e..6e23ee5a67a0 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.h +++ b/ydb/core/tx/columnshard/engines/db_wrapper.h @@ -1,5 +1,6 @@ #pragma once #include "defs.h" +#include namespace NKikimr::NTable { class TDatabase; diff --git a/ydb/core/tx/columnshard/engines/defs.h b/ydb/core/tx/columnshard/engines/defs.h index cb0ca8ee6ae7..a01edc7ef767 100644 --- a/ydb/core/tx/columnshard/engines/defs.h +++ b/ydb/core/tx/columnshard/engines/defs.h @@ -15,14 +15,6 @@ inline TWriteId operator++(TWriteId& w) noexcept { return w; } -class IBlobGroupSelector { -protected: - virtual ~IBlobGroupSelector() = default; - -public: - virtual ui32 GetGroup(const TLogoBlobID& blobId) const = 0; -}; - } // namespace NKikimr::NOlap template <> diff --git a/ydb/core/tx/columnshard/splitter/ut/ya.make b/ydb/core/tx/columnshard/splitter/ut/ya.make index 4d9e9af04665..6a5c2bb40f40 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ya.make +++ b/ydb/core/tx/columnshard/splitter/ut/ya.make @@ -8,6 +8,7 @@ PEERDIR( ydb/core/tx/columnshard/counters ydb/core/tx/columnshard/engines/portions + ydb/core/tx/columnshard/common ydb/core/kqp/common ydb/library/yql/parser/pg_wrapper ydb/library/yql/public/udf