-
Notifications
You must be signed in to change notification settings - Fork 610
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
511 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#include "checker.h" | ||
#include <ydb/core/formats/arrow/serializer/abstract.h> | ||
#include <ydb/core/formats/arrow/common/validation.h> | ||
#include <contrib/libs/apache/arrow/cpp/src/arrow/array/array_primitive.h> | ||
#include <contrib/libs/apache/arrow/cpp/src/arrow/record_batch.h> | ||
|
||
namespace NKikimr::NOlap::NIndexes::NCountMinSketch { | ||
|
||
void TCountMinSketchChecker::DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapIndexChecker& proto) const { | ||
Y_ABORT("Unimplemented"); // unimplemented, should not be used | ||
} | ||
|
||
bool TCountMinSketchChecker::DoCheckImpl(const std::vector<TString>& blobs) const { | ||
Y_ABORT("Unimplemented"); // unimplemented, should not be used | ||
return false; | ||
} | ||
|
||
bool TCountMinSketchChecker::DoDeserializeFromProtoImpl(const NKikimrSSA::TProgram::TOlapIndexChecker& proto) { | ||
Y_ABORT("Unimplemented"); // unimplemented, should not be used | ||
return false; | ||
} | ||
|
||
} // namespace NKikimr::NOlap::NIndexes |
32 changes: 32 additions & 0 deletions
32
ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#pragma once | ||
#include <ydb/core/tx/columnshard/engines/scheme/indexes/abstract/simple.h> | ||
|
||
namespace NKikimr::NOlap::NIndexes::NCountMinSketch { | ||
|
||
class TCountMinSketchChecker: public TSimpleIndexChecker { | ||
public: | ||
static TString GetClassNameStatic() { | ||
return "COUNT_MIN_SKETCH"; | ||
} | ||
private: | ||
using TBase = TSimpleIndexChecker; | ||
static inline auto Registrator = TFactory::TRegistrator<TCountMinSketchChecker>(GetClassNameStatic()); | ||
|
||
protected: | ||
virtual bool DoDeserializeFromProtoImpl(const NKikimrSSA::TProgram::TOlapIndexChecker& proto) override; | ||
virtual void DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapIndexChecker& proto) const override; | ||
|
||
virtual bool DoCheckImpl(const std::vector<TString>& blobs) const override; | ||
|
||
public: | ||
TCountMinSketchChecker() = default; | ||
TCountMinSketchChecker(const ui32 indexId) | ||
: TBase(indexId) | ||
{} | ||
|
||
virtual TString GetClassName() const override { | ||
return GetClassNameStatic(); | ||
} | ||
}; | ||
|
||
} // namespace NKikimr::NOlap::NIndexes |
58 changes: 58 additions & 0 deletions
58
ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#include "constructor.h" | ||
#include "meta.h" | ||
|
||
#include <ydb/core/tx/schemeshard/olap/schema/schema.h> | ||
|
||
namespace NKikimr::NOlap::NIndexes::NCountMinSketch { | ||
|
||
std::shared_ptr<NKikimr::NOlap::NIndexes::IIndexMeta> TCountMinSketchConstructor::DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { | ||
std::set<ui32> columnIds; | ||
for (auto&& i : ColumnNames) { | ||
auto* columnInfo = currentSchema.GetColumns().GetByName(i); | ||
if (!columnInfo) { | ||
errors.AddError("no column with name " + i); | ||
return nullptr; | ||
} | ||
AFL_VERIFY(columnIds.emplace(columnInfo->GetId()).second); | ||
} | ||
return std::make_shared<TIndexMeta>(indexId, indexName, columnIds); | ||
} | ||
|
||
NKikimr::TConclusionStatus TCountMinSketchConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { | ||
if (!jsonInfo.Has("column_names")) { | ||
return TConclusionStatus::Fail("column_names have to be in count min sketch features"); | ||
} | ||
const NJson::TJsonValue::TArray* columnNamesArray; | ||
if (!jsonInfo["column_names"].GetArrayPointer(&columnNamesArray)) { | ||
return TConclusionStatus::Fail("column_names have to be in count min sketch features as array ['column_name_1', ... , 'column_name_N']"); | ||
} | ||
for (auto&& i : *columnNamesArray) { | ||
if (!i.IsString()) { | ||
return TConclusionStatus::Fail("column_names have to be in count min sketch features as array of strings ['column_name_1', ... , 'column_name_N']"); | ||
} | ||
ColumnNames.emplace(i.GetString()); | ||
} | ||
return TConclusionStatus::Success(); | ||
} | ||
|
||
NKikimr::TConclusionStatus TCountMinSketchConstructor::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { | ||
if (!proto.HasCountMinSketch()) { | ||
const TString errorMessage = "not found CountMinSketch section in proto: \"" + proto.DebugString() + "\""; | ||
AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", errorMessage); | ||
return TConclusionStatus::Fail(errorMessage); | ||
} | ||
auto& sketch = proto.GetCountMinSketch(); | ||
for (auto&& i : sketch.GetColumnNames()) { | ||
ColumnNames.emplace(i); | ||
} | ||
return TConclusionStatus::Success(); | ||
} | ||
|
||
void TCountMinSketchConstructor::DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { | ||
auto* sketchProto = proto.MutableCountMinSketch(); | ||
for (auto&& i : ColumnNames) { | ||
sketchProto->AddColumnNames(i); | ||
} | ||
} | ||
|
||
} // namespace NKikimr::NOlap::NIndexes |
31 changes: 31 additions & 0 deletions
31
ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/constructor.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#pragma once | ||
#include <ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h> | ||
|
||
namespace NKikimr::NOlap::NIndexes::NCountMinSketch { | ||
|
||
class TCountMinSketchConstructor: public IIndexMetaConstructor { | ||
public: | ||
static TString GetClassNameStatic() { | ||
return "COUNT_MIN_SKETCH"; | ||
} | ||
private: | ||
std::set<TString> ColumnNames; | ||
static inline auto Registrator = TFactory::TRegistrator<TCountMinSketchConstructor>(GetClassNameStatic()); | ||
|
||
protected: | ||
virtual std::shared_ptr<IIndexMeta> DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const override; | ||
|
||
virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) override; | ||
|
||
virtual TConclusionStatus DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) override; | ||
virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const override; | ||
|
||
public: | ||
TCountMinSketchConstructor() = default; | ||
|
||
virtual TString GetClassName() const override { | ||
return GetClassNameStatic(); | ||
} | ||
}; | ||
|
||
} // namespace NKikimr::NOlap::NIndexes |
57 changes: 57 additions & 0 deletions
57
ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#include "meta.h" | ||
#include "checker.h" | ||
#include <ydb/core/formats/arrow/hash/xx_hash.h> | ||
#include <ydb/core/formats/arrow/hash/calcer.h> | ||
#include <ydb/core/tx/program/program.h> | ||
#include <ydb/core/tx/schemeshard/olap/schema/schema.h> | ||
#include <ydb/library/minsketch/stack_count_min_sketch.h> | ||
|
||
#include <contrib/libs/apache/arrow/cpp/src/arrow/array/builder_primitive.h> | ||
#include <library/cpp/deprecated/atomic/atomic.h> | ||
|
||
namespace NKikimr::NOlap::NIndexes::NCountMinSketch { | ||
|
||
TString TIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { | ||
std::vector<TStackAllocatedCountMinSketch<256, 8>> sketchesByColumns(ColumnIds.size()); | ||
|
||
AFL_VERIFY(std::distance(reader.begin(), reader.end()) == static_cast<long>(sketchesByColumns.size())); | ||
|
||
for (reader.Start(); reader.IsCorrect(); reader.ReadNext()) { | ||
size_t sketchIndex = 0; | ||
for (auto&& colReader : reader) { | ||
auto array = colReader.GetCurrentChunk(); | ||
auto& sketch = sketchesByColumns[sketchIndex]; | ||
int i = colReader.GetCurrentRecordIndex(); | ||
|
||
NArrow::SwitchType(array->type_id(), [&](const auto& type) { | ||
using TWrap = std::decay_t<decltype(type)>; | ||
using TArray = typename arrow::TypeTraits<typename TWrap::T>::ArrayType; | ||
|
||
const TArray& arrTyped = static_cast<const TArray&>(*array); | ||
if constexpr (arrow::has_c_type<typename TWrap::T>()) { | ||
auto cell = TCell::Make(arrTyped.Value(i)); | ||
sketch.Count(cell.Data(), cell.Size()); | ||
return true; | ||
} | ||
if constexpr (arrow::has_string_view<typename TWrap::T>()) { | ||
auto view = arrTyped.GetView(i); | ||
sketch.Count(view.data(), view.size()); | ||
return true; | ||
} | ||
AFL_VERIFY(false); | ||
}); | ||
++sketchIndex; | ||
} | ||
} | ||
|
||
TString result(reinterpret_cast<const char*>(sketchesByColumns.data()), sketchesByColumns.size() * TStackAllocatedCountMinSketch<256, 8>::GetSize()); | ||
return result; | ||
} | ||
|
||
void TIndexMeta::DoFillIndexCheckers(const std::shared_ptr<NRequest::TDataForIndexesCheckers>& info, const NSchemeShard::TOlapSchema& schema) const { | ||
for (auto&& branch : info->GetBranches()) { | ||
branch->MutableIndexes().emplace_back(std::make_shared<TCountMinSketchChecker>(GetIndexId())); | ||
} | ||
} | ||
|
||
} // namespace NKikimr::NOlap::NIndexes |
58 changes: 58 additions & 0 deletions
58
ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#pragma once | ||
#include <ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h> | ||
|
||
namespace NKikimr::NOlap::NIndexes::NCountMinSketch { | ||
|
||
class TIndexMeta: public TIndexByColumns { | ||
public: | ||
static TString GetClassNameStatic() { | ||
return "COUNT_MIN_SKETCH"; | ||
} | ||
|
||
private: | ||
using TBase = TIndexByColumns; | ||
|
||
static inline auto Registrator = TFactory::TRegistrator<TIndexMeta>(GetClassNameStatic()); | ||
|
||
protected: | ||
virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { | ||
const auto* bMeta = dynamic_cast<const TIndexMeta*>(&newMeta); | ||
if (!bMeta) { | ||
return TConclusionStatus::Fail("cannot read meta as appropriate class: " + GetClassName() + ". Meta said that class name is " + newMeta.GetClassName()); | ||
} | ||
return TBase::CheckSameColumnsForModification(newMeta); | ||
} | ||
|
||
virtual void DoFillIndexCheckers(const std::shared_ptr<NRequest::TDataForIndexesCheckers>& info, const NSchemeShard::TOlapSchema& schema) const override; | ||
|
||
virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; | ||
|
||
virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { | ||
AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); | ||
AFL_VERIFY(proto.HasCountMinSketch()); | ||
auto& sketch = proto.GetCountMinSketch(); | ||
for (auto&& i : sketch.GetColumnIds()) { | ||
ColumnIds.emplace(i); | ||
} | ||
return true; | ||
} | ||
|
||
virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const override { | ||
auto* sketchProto = proto.MutableCountMinSketch(); | ||
for (auto&& i : ColumnIds) { | ||
sketchProto->AddColumnIds(i); | ||
} | ||
} | ||
|
||
public: | ||
TIndexMeta() = default; | ||
TIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, std::set<ui32>& columnIds) | ||
: TBase(indexId, indexName, columnIds, storageId) { | ||
} | ||
|
||
virtual TString GetClassName() const override { | ||
return GetClassNameStatic(); | ||
} | ||
}; | ||
|
||
} // namespace NKikimr::NOlap::NIndexes |
15 changes: 15 additions & 0 deletions
15
ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/ya.make
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
LIBRARY() | ||
|
||
SRCS( | ||
GLOBAL constructor.cpp | ||
GLOBAL meta.cpp | ||
GLOBAL checker.cpp | ||
) | ||
|
||
PEERDIR( | ||
ydb/core/protos | ||
ydb/core/formats/arrow | ||
ydb/core/tx/columnshard/engines/storage/indexes/portions | ||
) | ||
|
||
END() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.