Skip to content

Commit

Permalink
Cached overriden statistics for CBO (#6791)
Browse files Browse the repository at this point in the history
  • Loading branch information
pavelvelikhov authored Jul 18, 2024
1 parent 302e204 commit fd163cc
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 8 deletions.
11 changes: 11 additions & 0 deletions ydb/core/kqp/opt/kqp_opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
const TIntrusivePtr<NYql::TKikimrTablesData> Tables;
int JoinsCount{};
int EquiJoinsCount{};
std::shared_ptr<NJson::TJsonValue> OverrideStatistics{};

std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() const {
if (Config->OverrideStatistics.Get()) {
auto jsonValue = new NJson::TJsonValue();
NJson::ReadJsonTree(*Config->OverrideStatistics.Get(), jsonValue, true);
return std::shared_ptr<NJson::TJsonValue>(jsonValue);
} else {
return std::shared_ptr<NJson::TJsonValue>();
}
}

bool IsDataQuery() const {
return QueryCtx->Type == NYql::EKikimrQueryType::Dml;
Expand Down
6 changes: 3 additions & 3 deletions ydb/core/kqp/opt/kqp_statistics_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
auto keyColumns = TIntrusivePtr<TOptimizerStatistics::TKeyColumns>(new TOptimizerStatistics::TKeyColumns(tableData.Metadata->KeyColumnNames));
auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, keyColumns);
if (kqpCtx.Config->OverrideStatistics.Get()) {
stats = OverrideStatistics(*stats, path.Value(), *kqpCtx.Config->OverrideStatistics.Get());
stats = OverrideStatistics(*stats, path.Value(), kqpCtx.GetOverrideStatistics());
}
if (stats->ColumnStatistics) {
for (const auto& [columnName, metaData]: tableData.Metadata->Columns) {
Expand Down Expand Up @@ -308,11 +308,11 @@ void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotatio
auto path = s3DataSource.Name().Cast().StringValue();
if (kqpCtx.Config->OverrideStatistics.Get() && path) {
auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, 0.0, 0, 0, 0.0, TIntrusivePtr<TOptimizerStatistics::TKeyColumns>());
stats = OverrideStatistics(*stats, path, *kqpCtx.Config->OverrideStatistics.Get());
stats = OverrideStatistics(*stats, path, kqpCtx.GetOverrideStatistics());
if (stats->ByteSize == 0.0) {
auto n = path.find_last_of('/');
if (n != path.npos) {
stats = OverrideStatistics(*stats, path.substr(n + 1), *kqpCtx.Config->OverrideStatistics.Get());
stats = OverrideStatistics(*stats, path.substr(n + 1), kqpCtx.GetOverrideStatistics());
}
}
if (stats->ByteSize != 0.0) {
Expand Down
6 changes: 2 additions & 4 deletions ydb/library/yql/core/yql_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,10 @@ TOptimizerStatistics& TOptimizerStatistics::operator+=(const TOptimizerStatistic
return *this;
}

std::shared_ptr<TOptimizerStatistics> NYql::OverrideStatistics(const NYql::TOptimizerStatistics& s, const TStringBuf& tablePath, const TString& statHints) {
std::shared_ptr<TOptimizerStatistics> NYql::OverrideStatistics(const NYql::TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr<NJson::TJsonValue>& stats) {
auto res = std::make_shared<TOptimizerStatistics>(s.Type, s.Nrows, s.Ncols, s.ByteSize, s.Cost, s.KeyColumns, s.ColumnStatistics);

NJson::TJsonValue root;
NJson::ReadJsonTree(statHints, &root, true);
auto dbStats = root.GetMapSafe();
auto dbStats = stats->GetMapSafe();

if (!dbStats.contains(tablePath)){
return res;
Expand Down
4 changes: 3 additions & 1 deletion ydb/library/yql/core/yql_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include <ydb/library/minsketch/count_min_sketch.h>

#include <library/cpp/json/json_reader.h>

#include <util/generic/vector.h>
#include <util/generic/hash.h>

Expand Down Expand Up @@ -80,6 +82,6 @@ struct TOptimizerStatistics {
friend std::ostream& operator<<(std::ostream& os, const TOptimizerStatistics& s);
};

std::shared_ptr<TOptimizerStatistics> OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const TString& statHints);
std::shared_ptr<TOptimizerStatistics> OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr<NJson::TJsonValue>& stats);

}

0 comments on commit fd163cc

Please sign in to comment.