diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp new file mode 100644 index 00000000000000..933b980c3a92a3 --- /dev/null +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/aggregate_functions/aggregate_function_bitmap.h" +#include "vec/aggregate_functions/aggregate_function_simple_factory.h" + +namespace doris::vectorized { + +template class AggregateFunctionTemplate> +static IAggregateFunction* createWithIntDataType(const DataTypes& argument_type) { + auto type = argument_type[0].get(); + if(type->isNullable()) { + type = assert_cast(type)->getNestedType().get(); + } + WhichDataType which(type); + if (which.idx == TypeIndex::Int8) + return new AggregateFunctionTemplate>(argument_type); + if (which.idx == TypeIndex::Int16) + return new AggregateFunctionTemplate>(argument_type); + if (which.idx == TypeIndex::Int32) + return new AggregateFunctionTemplate>(argument_type); + if (which.idx == TypeIndex::Int64) + return new AggregateFunctionTemplate>(argument_type); + return nullptr; +} + +AggregateFunctionPtr createAggregateFunctionBitmapUnion(const std::string& name, + const DataTypes& argument_types, + const Array& parameters) { + return std::make_shared>(argument_types); +} + +AggregateFunctionPtr createAggregateFunctionBitmapIntersect(const std::string& name, + const DataTypes& argument_types, + const Array& parameters) { + return std::make_shared>(argument_types); +} +template +AggregateFunctionPtr createAggregateFunctionBitmapUnionCount(const std::string& name, + const DataTypes& argument_types, + const Array& parameters) { + return std::make_shared>(argument_types); +} + +template +AggregateFunctionPtr createAggregateFunctionBitmapUnionInt(const std::string& name, + const DataTypes& argument_types, + const Array& parameters) { + + return std::shared_ptr(createWithIntDataType(argument_types)); +} + +void registerAggregateFunctionBitmap(AggregateFunctionSimpleFactory& factory) { + factory.registerFunction("bitmap_union", createAggregateFunctionBitmapUnion); + factory.registerFunction("bitmap_intersect", createAggregateFunctionBitmapIntersect); + factory.registerFunction("bitmap_union_count", createAggregateFunctionBitmapUnionCount); + factory.registerFunction("bitmap_union_count", createAggregateFunctionBitmapUnionCount, true); + + factory.registerFunction("bitmap_union_int", createAggregateFunctionBitmapUnionInt); + factory.registerFunction("bitmap_union_int", createAggregateFunctionBitmapUnionInt, true); +} +} \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h new file mode 100644 index 00000000000000..a18e16e2cbfb87 --- /dev/null +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include +#include + +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column_complex.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_bitmap.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/columns/column_nullable.h" +#include "vec/common/assert_cast.h" +#include "vec/io/io_helper.h" + +namespace doris::vectorized { + +struct AggregateFunctionBitmapUnionOp { + static constexpr auto name = "bitmap_union"; + + template + static void add(BitmapValue& res, const T& data) { + res.add(data); + } + + static void add(BitmapValue& res, const BitmapValue& data) { res |= data; } + + static void merge(BitmapValue& res, const BitmapValue& data) { res |= data; } +}; + +struct AggregateFunctionBitmapIntersectOp { + static constexpr auto name = "bitmap_intersect"; + static void add(BitmapValue& res, const BitmapValue& data) { res &= data; } + + static void merge(BitmapValue& res, const BitmapValue& data) { res &= data; } +}; + +template +struct AggregateFunctionBitmapData { + BitmapValue value; + + template + void add(const T& data) { Op::add(value, data); } + + void merge(const BitmapValue& data) { Op::merge(value, data); } + + void write(std::ostream& buf) const { DataTypeBitMap::serializeAsStream(value, buf); } + + void read(std::istream& buf) { DataTypeBitMap::deserializeAsStream(value, buf); } + + BitmapValue& get() { return value; } +}; + +template +class AggregateFunctionBitmapOp final + : public IAggregateFunctionDataHelper, + AggregateFunctionBitmapOp> { +public: + using ResultDataType = BitmapValue; + using ColVecType = ColumnBitmap; + using ColVecResult = ColumnBitmap; + + String getName() const override { return Op::name; } + + AggregateFunctionBitmapOp(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper, + AggregateFunctionBitmapOp>(argument_types_, {}) {} + + DataTypePtr getReturnType() const override { return std::make_shared(); } + + void add(AggregateDataPtr place, const IColumn** columns, size_t row_num, + Arena*) const override { + const auto& column = static_cast(*columns[0]); + this->data(place).add(column.getData()[row_num]); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override { + this->data(place).merge( + const_cast&>(this->data(rhs)).get()); + } + + void serialize(ConstAggregateDataPtr place, std::ostream& buf) const override { + this->data(place).write(buf); + } + + void deserialize(AggregateDataPtr place, std::istream& buf, Arena*) const override { + this->data(place).read(buf); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn& to) const override { + auto& column = static_cast(to); + column.getData().push_back( + const_cast&>(this->data(place)).get()); + } + + const char* getHeaderFilePath() const override { return __FILE__; } +}; + +template +class AggregateFunctionBitmapCount final : public IAggregateFunctionDataHelper< + AggregateFunctionBitmapData, + AggregateFunctionBitmapCount> { +public: + // using ColVecType = ColumnBitmap; + using ColVecResult = ColumnVector; + using AggFunctionData = AggregateFunctionBitmapData; + + AggregateFunctionBitmapCount(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper, + AggregateFunctionBitmapCount>(argument_types_, {}) {} + + String getName() const override { return "count"; } + DataTypePtr getReturnType() const override { return std::make_shared(); } + + void add(AggregateDataPtr place, const IColumn** columns, size_t row_num, + Arena*) const override { + if constexpr (nullable) { + auto& nullable_column = assert_cast(*columns[0]); + if (!nullable_column.isNullAt(row_num)) { + const auto& column = static_cast(nullable_column.getNestedColumn()); + this->data(place).add(column.getData()[row_num]); + } + } else { + const auto& column = static_cast(*columns[0]); + this->data(place).add(column.getData()[row_num]); + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override { + this->data(place).merge( + const_cast(this->data(rhs)).get()); + } + + void serialize(ConstAggregateDataPtr place, std::ostream& buf) const override { + this->data(place).write(buf); + } + + void deserialize(AggregateDataPtr place, std::istream& buf, Arena*) const override { + this->data(place).read(buf); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn& to) const override { + auto & value_data = const_cast(this->data(place)).get(); + auto& column = static_cast(to); + column.getData().push_back(value_data.cardinality()); + } + + const char* getHeaderFilePath() const override { return __FILE__; } +}; + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 2ab25022bb2ea8..f730b1ded865f7 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -72,6 +72,15 @@ class Block { void erase(const std::set& positions); /// remove the column with the specified name void erase(const String& name); + // T was std::set, std::vector, std::list + template + void erase_not_in(const T& container) { + Container new_data; + for(auto pos: container) { + new_data.emplace_back(std::move(data[pos])); + } + std::swap(data, new_data); + } /// References are invalidated after calling functions above. diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp index 310e160f75b5f5..ee3353ab5f28cd 100644 --- a/be/src/vec/exec/vunion_node.cpp +++ b/be/src/vec/exec/vunion_node.cpp @@ -117,10 +117,12 @@ Status VUnionNode::get_next_const(RuntimeState* state, Block* block) { MutableBlock mblock; for (; _const_expr_list_idx < _const_expr_lists.size(); ++_const_expr_list_idx) { Block tmp_block; - for (size_t i = 0; i < _const_expr_lists[_const_expr_list_idx].size(); ++i) { - int result_column_num = -1; - _const_expr_lists[_const_expr_list_idx][i]->execute(&tmp_block, &result_column_num); + int const_expr_lists_size = _const_expr_lists[_const_expr_list_idx].size(); + std::vector result_list(const_expr_lists_size); + for (size_t i = 0; i < const_expr_lists_size; ++i) { + _const_expr_lists[_const_expr_list_idx][i]->execute(&tmp_block, &result_list[i]); } + tmp_block.erase_not_in(result_list); mblock.merge(tmp_block); } block->swap(mblock.to_block()); diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 08509bd134bd0c..51f823fe0b2443 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -17,9 +17,23 @@ #include "util/string_parser.hpp" #include "vec/functions/function_totype.h" +#include "vec/functions/function_const.h" #include "vec/functions/simple_function_factory.h" +#include "gutil/strings/split.h" namespace doris::vectorized { + +struct BitmapEmpty { + static constexpr auto name = "bitmap_empty"; + using ReturnColVec = ColumnBitmap; + static DataTypePtr get_return_type() { + return std::make_shared(); + } + static auto init_value() { + return BitmapValue{}; + } +}; + struct NameToBitmap { static constexpr auto name = "to_bitmap"; }; @@ -54,6 +68,60 @@ struct ToBitmapImpl { } }; +struct NameBitmapFromString { + static constexpr auto name = "bitmap_from_string"; +}; + +struct BitmapFromString { + using ReturnType = DataTypeBitMap; + static constexpr auto TYPE_INDEX = TypeIndex::String; + using Type = String; + using ReturnColumnType = ColumnBitmap; + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + std::vector& res) { + auto size = offsets.size(); + res.reserve(size); + std::vector bits; + for (int i = 0; i < size; ++i) { + const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); + int str_size = offsets[i] - offsets[i - 1] - 1; + if (SplitStringAndParse({raw_str, str_size}, + ",", &safe_strtou64, &bits)) { + res.emplace_back(bits); + } else { + res.emplace_back(); + } + bits.clear(); + } + return Status::OK(); + } +}; + +struct NameBitmapHash { + static constexpr auto name = "bitmap_hash"; +}; + +struct BitmapHash { + using ReturnType = DataTypeBitMap; + static constexpr auto TYPE_INDEX = TypeIndex::String; + using Type = String; + using ReturnColumnType = ColumnBitmap; + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + std::vector& res) { + auto size = offsets.size(); + res.reserve(size); + for (int i = 0; i < size; ++i) { + const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); + int str_size = offsets[i] - offsets[i - 1] - 1; + uint32_t hash_value = + HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED); + res.emplace_back(); + res.back().add(hash_value); + } + return Status::OK(); + } +}; + struct NameBitmapCount { static constexpr auto name = "bitmap_count"; }; @@ -204,7 +272,11 @@ struct BitmapHasAny { } }; +using FunctionBitmapEmpty = FunctionConst; using FunctionToBitmap = FunctionUnaryToType; +using FunctionBitmapFromString = FunctionUnaryToType; +using FunctionBitmapHash = FunctionUnaryToType; + using FunctionBitmapCount = FunctionUnaryToType; using FunctionBitmapAnd = @@ -223,7 +295,10 @@ using FunctionBitmapHasAny = FunctionBinaryToType; void registerFunctionBitmap(SimpleFunctionFactory& factory) { + factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/be/src/vec/functions/function_const.h b/be/src/vec/functions/function_const.h index 73a092919c74fd..37d2585bcdd5b1 100644 --- a/be/src/vec/functions/function_const.h +++ b/be/src/vec/functions/function_const.h @@ -3,17 +3,18 @@ #include "vec/columns/columns_number.h" #include "vec/data_types/data_type_number.h" #include "vec/functions/function.h" +#include "vec/columns/column_const.h" namespace doris::vectorized { -template +template class FunctionConst : public IFunction { public: static constexpr auto name = Impl::name; static FunctionPtr create() { return std::make_shared(); } -private: +public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -30,4 +31,26 @@ class FunctionConst : public IFunction { } }; +template +class FunctionConst: public IFunction { +public: + static constexpr auto name = Impl::name; + static FunctionPtr create() { return std::make_shared(); } + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes& /*arguments*/) const override { + return Impl::get_return_type(); + } + + Status executeImpl(Block& block, const ColumnNumbers&, size_t result, + size_t input_rows_count) override { + auto column = Impl::ReturnColVec::create(); + column->getData().emplace_back(Impl::init_value()); + block.getByPosition(result).column = ColumnConst::create(std::move(column), 1); + return Status::OK(); + } +}; + } // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfoBase.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfoBase.java index 9102c57d995a30..b3d8a2e2684310 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfoBase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfoBase.java @@ -153,6 +153,7 @@ private TupleDescriptor createTupleDesc(Analyzer analyzer, boolean isOutputTuple if (aggExpr.getFnName().getFunction().equals(FunctionSet.COUNT) || aggExpr.getFnName().getFunction().equals("ndv") || aggExpr.getFnName().getFunction().equals(FunctionSet.BITMAP_UNION_INT) + || aggExpr.getFnName().getFunction().equals(FunctionSet.BITMAP_UNION_COUNT) || aggExpr.getFnName().getFunction().equals("ndv_no_finalize")) { // TODO: Consider making nullability a property of types or of builtin agg fns. // row_number(), rank(), and dense_rank() are non-nullable as well. diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 178171065c94f2..e04f7aed12a5dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1381,6 +1381,16 @@ private void initAggregateBuiltins() { "_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE", "_ZN5doris15BitmapFunctions15bitmap_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE", true, false, true)); + // vectorized + addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION_INT, + Lists.newArrayList(t), Type.BIGINT, t, + "", + BITMAP_UNION_INT_SYMBOL.get(t), + "", + "", + "", + true, false, true, true)); + // INTERSECT_COUNT addBuiltin(AggregateFunction.createBuiltin(INTERSECT_COUNT, diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 56c61fe6c86dc5..68c85237a23c8f 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -803,11 +803,11 @@ [['to_bitmap'], 'BITMAP', ['VARCHAR'], '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE', 'vec'], [['bitmap_hash'], 'BITMAP', ['VARCHAR'], - '_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE'], + '_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE', 'vec'], [['bitmap_count'], 'BIGINT', ['BITMAP'], '_ZN5doris15BitmapFunctions12bitmap_countEPN9doris_udf15FunctionContextERKNS1_9StringValE', 'vec'], [['bitmap_empty'], 'BITMAP', [], - '_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE'], + '_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE', 'vec'], [['bitmap_or'], 'BITMAP', ['BITMAP','BITMAP'], '_ZN5doris15BitmapFunctions9bitmap_orEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', 'vec'], [['bitmap_xor'], 'BITMAP', ['BITMAP','BITMAP'],