Skip to content

Commit

Permalink
[ut](merger) Add test case for sort merger
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriel39 committed Feb 18, 2025
1 parent 9bce79b commit 2f660c2
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 8 deletions.
2 changes: 1 addition & 1 deletion be/src/vec/core/sort_cursor.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl {
}
block->clear();
THROW_IF_ERROR(_block_supplier(block.get(), &_is_eof));
DCHECK(!block->empty() || _is_eof);
DCHECK(!block->empty() xor _is_eof);
if (!block->empty()) {
DCHECK_EQ(_ordering_expr.size(), desc.size());
for (int i = 0; i < desc.size(); ++i) {
Expand Down
36 changes: 29 additions & 7 deletions be/test/testutil/column_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,40 @@
#include <type_traits>
#include <vector>

#include "vec/columns/column_nullable.h"
#include "vec/core/block.h"
#include "vec/data_types/data_type_string.h"

namespace doris::vectorized {
struct ColumnHelper {
public:
template <typename DataType>
static ColumnPtr create_column(const std::vector<typename DataType::FieldType>& datas) {
static ColumnPtr create_column(const std::vector<typename DataType::FieldType>& data) {
auto column = DataType::ColumnType::create();
if constexpr (std::is_same_v<DataTypeString, DataType>) {
for (const auto& data : datas) {
column->insert_data(data.data(), data.size());
for (const auto& datum : data) {
column->insert_data(datum.data(), datum.size());
}
} else {
for (const auto& data : datas) {
column->insert_value(data);
for (const auto& datum : data) {
column->insert_value(datum);
}
}
return std::move(column);
}

template <typename DataType>
static ColumnPtr create_nullable_column(
const std::vector<typename DataType::FieldType>& data,
const std::vector<typename NullMap::value_type>& null_map) {
auto null_col = ColumnUInt8::create();
for (const auto& datum : null_map) {
null_col->insert_value(datum);
}
auto ptr = create_column<DataType>(data);
return ColumnNullable::create(std::move(ptr), std::move(null_col));
}

static bool column_equal(const ColumnPtr& column1, const ColumnPtr& column2) {
if (column1->size() != column2->size()) {
return false;
Expand All @@ -67,13 +80,22 @@ struct ColumnHelper {
}

template <typename DataType>
static Block create_block(const std::vector<typename DataType::FieldType>& datas) {
auto column = create_column<DataType>(datas);
static Block create_block(const std::vector<typename DataType::FieldType>& data) {
auto column = create_column<DataType>(data);
auto data_type = std::make_shared<DataType>();
Block block({ColumnWithTypeAndName(column, data_type, "column")});
return block;
}

template <typename DataType>
static Block create_nullable_block(const std::vector<typename DataType::FieldType>& data,
const std::vector<typename NullMap::value_type>& null_map) {
auto column = create_nullable_column<DataType>(data, null_map);
auto data_type = std::make_shared<DataTypeNullable>(std::make_shared<DataType>());
Block block({ColumnWithTypeAndName(column, data_type, "column")});
return block;
}

template <typename DataType>
static ColumnWithTypeAndName create_column_with_name(
const std::vector<typename DataType::FieldType>& datas) {
Expand Down
252 changes: 252 additions & 0 deletions be/test/vec/runtime/sort_merger_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include "testutil/column_helper.h"
#include "testutil/mock/mock_slot_ref.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/runtime/vsorted_run_merger.h"

namespace doris::vectorized {

class SortMergerTest : public testing::Test {
public:
SortMergerTest() = default;
~SortMergerTest() override = default;
void SetUp() override {}
void TearDown() override {}
};

TEST(SortMergerTest, NULL_FIRST_ASC) {
/**
* in: [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4]
* out: [NULL, NULL, NULL, NULL, NULL], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4], [4], [4], [4], [4]
*/
const int num_children = 5;
const int batch_size = 5;
std::vector<int> round;
round.resize(num_children, 0);
const int num_round = 2;

std::unique_ptr<VSortedRunMerger> merger;
auto profile = std::make_shared<RuntimeProfile>("");
auto ordering_expr = MockSlotRef::create_mock_contexts(
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>()));
{
std::vector<bool> is_asc_order = {true};
std::vector<bool> nulls_first = {true};
const int limit = -1;
const int offset = 0;
merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size,
limit, offset, profile.get()));
}
{
std::vector<vectorized::BlockSupplier> child_block_suppliers;
for (int child_idx = 0; child_idx < num_children; child_idx++) {
vectorized::BlockSupplier block_supplier =
[&, round_vec = &round, num_round = num_round, id = child_idx](
vectorized::Block* block, bool* eos) {
*eos = ++((*round_vec)[id]) == num_round;
if (*eos) {
return Status::OK();
}
*block = ColumnHelper::create_nullable_block<DataTypeInt64>(
{0, (*round_vec)[id] + 0, (*round_vec)[id] + 1,
(*round_vec)[id] + 2, (*round_vec)[id] + 3},
{1, 0, 0, 0, 0});

return Status::OK();
};
child_block_suppliers.push_back(block_supplier);
}
EXPECT_TRUE(merger->prepare(child_block_suppliers).ok());
}
{
for (int block_idx = 0; block_idx < num_children * (num_round - 1) - 1; block_idx++) {
vectorized::Block block;
bool eos = false;
EXPECT_TRUE(merger->get_next(&block, &eos).ok());
auto expect_block =
block_idx == 0
? ColumnHelper::create_nullable_column<DataTypeInt64>({0, 0, 0, 0, 0},
{1, 1, 1, 1, 1})
: ColumnHelper::create_nullable_column<DataTypeInt64>(
{block_idx, block_idx, block_idx, block_idx, block_idx},
{0, 0, 0, 0, 0});
EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block))
<< block_idx;
EXPECT_EQ(block.rows(), batch_size);
EXPECT_FALSE(eos);
}
for (int block_idx = 0; block_idx < num_children; block_idx++) {
vectorized::Block block;
bool eos = false;
EXPECT_TRUE(merger->get_next(&block, &eos).ok());
auto expect_block = ColumnHelper::create_nullable_column<DataTypeInt64>({4}, {0});
EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block))
<< ((ColumnInt64*)((ColumnNullable*)block.get_by_position(0).column.get())
->get_nested_column_ptr()
.get())
->get_data()[0];
EXPECT_EQ(block.rows(), 1);
EXPECT_FALSE(eos);
}
vectorized::Block block;
bool eos = false;
EXPECT_TRUE(merger->get_next(&block, &eos).ok());
EXPECT_EQ(block.rows(), 0);
EXPECT_TRUE(eos);
}
}

TEST(SortMergerTest, NULL_LAST_DESC) {
/**
* in: [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL], [4, 3, 2, 1, NULL]
* out: [4, 4, 4, 4, 4], [3, 3, 3, 3, 3], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1], [NULL], [NULL], [NULL], [NULL], [NULL]
*/
const int num_children = 5;
const int batch_size = 5;
std::vector<int> round;
round.resize(num_children, 0);
const int num_round = 2;

std::unique_ptr<VSortedRunMerger> merger;
auto profile = std::make_shared<RuntimeProfile>("");
auto ordering_expr = MockSlotRef::create_mock_contexts(
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>()));
{
std::vector<bool> is_asc_order = {false};
std::vector<bool> nulls_first = {false};
const int limit = -1;
const int offset = 0;
merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size,
limit, offset, profile.get()));
}
{
std::vector<vectorized::BlockSupplier> child_block_suppliers;
for (int child_idx = 0; child_idx < num_children; child_idx++) {
vectorized::BlockSupplier block_supplier =
[&, round_vec = &round, num_round = num_round, id = child_idx](
vectorized::Block* block, bool* eos) {
*eos = ++((*round_vec)[id]) == num_round;
if (*eos) {
return Status::OK();
}
*block = ColumnHelper::create_nullable_block<DataTypeInt64>(
{(*round_vec)[id] + 3, (*round_vec)[id] + 2, (*round_vec)[id] + 1,
(*round_vec)[id] + 0, 0},
{0, 0, 0, 0, 1});

return Status::OK();
};
child_block_suppliers.push_back(block_supplier);
}
EXPECT_TRUE(merger->prepare(child_block_suppliers).ok());
}
{
for (int block_idx = 0; block_idx < num_children * (num_round - 1) - 1; block_idx++) {
vectorized::Block block;
bool eos = false;
EXPECT_TRUE(merger->get_next(&block, &eos).ok());
auto expect_block = ColumnHelper::create_nullable_column<DataTypeInt64>(
{4 - block_idx, 4 - block_idx, 4 - block_idx, 4 - block_idx, 4 - block_idx},
{0, 0, 0, 0, 0});
EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block))
<< block_idx;
EXPECT_EQ(block.rows(), batch_size);
EXPECT_FALSE(eos);
}
for (int block_idx = 0; block_idx < num_children; block_idx++) {
vectorized::Block block;
bool eos = false;
EXPECT_TRUE(merger->get_next(&block, &eos).ok());
auto expect_block = ColumnHelper::create_nullable_column<DataTypeInt64>({0}, {1});
EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block))
<< ((ColumnInt64*)((ColumnNullable*)block.get_by_position(0).column.get())
->get_nested_column_ptr()
.get())
->get_data()[0];
EXPECT_EQ(block.rows(), 1);
EXPECT_FALSE(eos);
}
vectorized::Block block;
bool eos = false;
EXPECT_TRUE(merger->get_next(&block, &eos).ok());
EXPECT_EQ(block.rows(), 0);
EXPECT_TRUE(eos);
}
}

TEST(SortMergerTest, TEST_LIMIT) {
/**
* in: [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4], [NULL, 1, 2, 3, 4]
* offset = 20, limit = 1
* out: [4]
*/
const int num_children = 5;
const int batch_size = 5;
std::vector<int> round;
round.resize(num_children, 0);
const int num_round = 2;

std::unique_ptr<VSortedRunMerger> merger;
auto profile = std::make_shared<RuntimeProfile>("");
auto ordering_expr = MockSlotRef::create_mock_contexts(
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>()));
{
std::vector<bool> is_asc_order = {true};
std::vector<bool> nulls_first = {true};
const int limit = 1;
const int offset = 20;
merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size,
limit, offset, profile.get()));
}
{
std::vector<vectorized::BlockSupplier> child_block_suppliers;
for (int child_idx = 0; child_idx < num_children; child_idx++) {
vectorized::BlockSupplier block_supplier =
[&, round_vec = &round, num_round = num_round, id = child_idx](
vectorized::Block* block, bool* eos) {
*eos = ++((*round_vec)[id]) == num_round;
if (*eos) {
return Status::OK();
}
*block = ColumnHelper::create_nullable_block<DataTypeInt64>(
{0, (*round_vec)[id] + 0, (*round_vec)[id] + 1,
(*round_vec)[id] + 2, (*round_vec)[id] + 3},
{1, 0, 0, 0, 0});

return Status::OK();
};
child_block_suppliers.push_back(block_supplier);
}
EXPECT_TRUE(merger->prepare(child_block_suppliers).ok());
}
{
vectorized::Block block;
bool eos = false;
EXPECT_TRUE(merger->get_next(&block, &eos).ok());
auto expect_block = ColumnHelper::create_nullable_column<DataTypeInt64>({4}, {0});
EXPECT_TRUE(ColumnHelper::column_equal(block.get_by_position(0).column, expect_block));
EXPECT_EQ(block.rows(), 1);
EXPECT_TRUE(eos);
}
}

} // namespace doris::vectorized

0 comments on commit 2f660c2

Please sign in to comment.