Skip to content

Commit

Permalink
[Fix](Variant) implement resize and set num_rows right when `subcolum…
Browse files Browse the repository at this point in the history
…ns` is empty (#38364)

Set rows correct when subcolumns is empty, and implement resize method
with correct semantic
This fix the potential crash with stack bellow
```
6# doris::vectorized::ColumnObject::get(unsigned long, doris::vectorized::Field&) const at /root/doris/be/src/vec/columns/column_object.cpp:841
7# doris::vectorized::ColumnObject::operator[](unsigned long) const at /root/doris/be/src/vec/columns/column_object.cpp:838
8# doris::vectorized::ColumnObject::insert_from(doris::vectorized::IColumn const&, unsigned long) in /home/work/unlimit_teamcity/TeamCity/Agents/20240725170551agent_172.16.0.48_1/work/60183217f6ee2a9c/output/be/lib/doris_be
9# doris::vectorized::ColumnObject::insert_indices_from(doris::vectorized::IColumn const&, unsigned int const*, unsigned int const*) at /root/doris/be/src/vec/columns/column_object.cpp:1581
```
  • Loading branch information
eldenmoon authored Jul 26, 2024
1 parent 9a13422 commit d176998
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 3 deletions.
61 changes: 59 additions & 2 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,24 @@ MutableColumnPtr ColumnObject::apply_for_subcolumns(Func&& func) const {
res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(),
subcolumn->data.get_least_common_type());
}
check_consistency();
return res;
}

void ColumnObject::resize(size_t n) {
if (n == num_rows) {
return;
}
if (n > num_rows) {
insert_many_defaults(n - num_rows);
} else {
for (auto& subcolumn : subcolumns) {
subcolumn->data.pop_back(num_rows - n);
}
}
num_rows = n;
}

bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) {
if (num_rows < config::variant_threshold_rows_to_estimate_sparse_column) {
return false;
Expand Down Expand Up @@ -697,8 +712,16 @@ MutableColumnPtr ColumnObject::clone_resized(size_t new_size) const {
if (new_size == 0) {
return ColumnObject::create(is_nullable);
}
return apply_for_subcolumns(
// If subcolumns are empty, then res will be empty but new_size > 0
if (subcolumns.empty()) {
// Add an emtpy column with new_size rows
auto res = ColumnObject::create(true, false);
res->set_num_rows(new_size);
return res;
}
auto res = apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.clone_resized(new_size); });
return res;
}

size_t ColumnObject::byte_size() const {
Expand Down Expand Up @@ -838,7 +861,10 @@ Field ColumnObject::operator[](size_t n) const {
}

void ColumnObject::get(size_t n, Field& res) const {
assert(n < size());
if (UNLIKELY(n >= size())) {
throw doris::Exception(ErrorCode::OUT_OF_BOUND,
"Index ({}) for getting field is out of range", n);
}
res = VariantMap();
auto& object = res.get<VariantMap&>();

Expand Down Expand Up @@ -886,11 +912,32 @@ void ColumnObject::insert_range_from(const IColumn& src, size_t start, size_t le
}

ColumnPtr ColumnObject::replicate(const Offsets& offsets) const {
if (subcolumns.empty()) {
// Add an emtpy column with offsets.back rows
auto res = ColumnObject::create(true, false);
res->set_num_rows(offsets.back());
}
return apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.replicate(offsets); });
}

ColumnPtr ColumnObject::permute(const Permutation& perm, size_t limit) const {
if (subcolumns.empty()) {
if (limit == 0) {
limit = num_rows;
} else {
limit = std::min(num_rows, limit);
}

if (perm.size() < limit) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"Size of permutation is less than required.");
}
// Add an emtpy column with limit rows
auto res = ColumnObject::create(true, false);
res->set_num_rows(limit);
return res;
}
return apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.permute(perm, limit); });
}
Expand Down Expand Up @@ -1420,6 +1467,12 @@ ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const {
return finalized_object.apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.filter(filter, count); });
}
if (subcolumns.empty()) {
// Add an emtpy column with filtered rows
auto res = ColumnObject::create(true, false);
res->set_num_rows(count_bytes_in_filter(filter));
return res;
}
auto new_column = ColumnObject::create(true, false);
for (auto& entry : subcolumns) {
auto subcolumn = entry->data.get_finalized_column().filter(filter, count);
Expand All @@ -1433,6 +1486,10 @@ Status ColumnObject::filter_by_selector(const uint16_t* sel, size_t sel_size, IC
if (!is_finalized()) {
finalize();
}
if (subcolumns.empty()) {
assert_cast<ColumnObject*>(col_ptr)->insert_many_defaults(sel_size);
return Status::OK();
}
auto* res = assert_cast<ColumnObject*>(col_ptr);
for (const auto& subcolumn : subcolumns) {
auto new_subcolumn = subcolumn->data.get_least_common_type()->create_column();
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/columns/column_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,8 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {

void clear() override;

void resize(size_t n) override;

void clear_subcolumns_data();

std::string get_name() const override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ suite("variant_mv") {
where g2.actor['id'] > 34259289;
"""
def query3_6 = """
SELECT
SELECT /*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=false,enable_distinct_streaming_aggregation=true,parallel_fragment_exec_instance_num=3,parallel_pipeline_task_num=0,profile_level=1,enable_pipeline_engine=true,enable_parallel_scan=true,parallel_scan_max_scanners_count=32,parallel_scan_min_rows_per_scanner=64,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=1,enable_parallel_result_sink=false,enable_nereids_planner=true,rewrite_or_to_in_predicate_threshold=100000,enable_function_pushdown=false,enable_common_expr_pushdown=false,enable_local_exchange=true,partitioned_hash_join_rows_threshold=8,partitioned_hash_agg_rows_threshold=8,partition_pruning_expand_threshold=10,enable_share_hash_table_for_broadcast_join=true,enable_two_phase_read_opt=true,enable_common_expr_pushdown_for_inverted_index=false,enable_delete_sub_predicate_v2=false,min_revocable_mem=33554432,fetch_remote_schema_timeout_seconds=120,max_fetch_remote_schema_tablet_count=512,enable_join_spill=false,enable_sort_spill=false,enable_agg_spill=false,enable_force_spill=false,data_queue_max_blocks=1,spill_streaming_agg_mem_limit=268435456,external_agg_partition_bits=5) */
g1.id,
g2.type,
floor(cast(g1.actor['id'] as int) + 100.5),
Expand Down

0 comments on commit d176998

Please sign in to comment.