From 80f78057965ff96eac8e326915f0bf0851d5b9e2 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Wed, 26 Jun 2024 21:22:55 +0800 Subject: [PATCH] upd --- be/src/vec/functions/function_string.h | 139 ++++++++++++++++--------- 1 file changed, 92 insertions(+), 47 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index bc15803fe4589ea..9d71762e7866c61 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "common/compiler_util.h" // IWYU pragma: keep @@ -70,6 +71,7 @@ #include "vec/data_types/data_type.h" #include "vec/functions/round.h" #include "vec/io/io_helper.h" +#include "vec/utils/template_helpers.hpp" #ifndef USE_LIBCPP #include @@ -2277,72 +2279,115 @@ class FunctionStringParseUrl : public IFunction { res_offsets.resize(input_rows_count); size_t argument_size = arguments.size(); - bool has_key = argument_size >= 3; + const bool has_key = argument_size >= 3; std::vector argument_columns(argument_size); + std::vector col_const(argument_size); for (size_t i = 0; i < argument_size; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - } - - const auto* url_col = check_and_get_column(argument_columns[0].get()); - const auto* part_col = check_and_get_column(argument_columns[1].get()); - const ColumnString* key_col = nullptr; - if (has_key) { - key_col = check_and_get_column(argument_columns[2].get()); - } - - if (!url_col || !part_col || (has_key && !key_col)) { - return Status::InternalError("Not supported input arguments types"); + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); } - for (size_t i = 0; i < input_rows_count; ++i) { - if (null_map_data[i]) { - StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); - continue; - } - - auto part = part_col->get_data_at(i); - StringRef p(const_cast(part.data), part.size); - UrlParser::UrlPart url_part = UrlParser::get_url_part(p); - StringRef url_key; - if (has_key) { - auto key = key_col->get_data_at(i); - url_key = StringRef(const_cast(key.data), key.size); - } - - auto source = url_col->get_data_at(i); - StringRef url_val(const_cast(source.data), source.size); - - StringRef parse_res; - bool success = false; - if (has_key) { - success = UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res); - } else { - success = UrlParser::parse_url(url_val, url_part, &parse_res); - } - - if (!success) { - // url is malformed, or url_part is invalid. + const auto* url_col = assert_cast(argument_columns[0].get()); + const auto* part_col = assert_cast(argument_columns[1].get()); + const bool part_const = col_const[1]; + std::vector url_parts; + if (part_const) { + StringRef part = part_col->get_data_at(0); + UrlParser::UrlPart url_part = UrlParser::get_url_part(part); + if (url_part == UrlParser::INVALID) { + return Status::RuntimeError("Invalid URL part: {}\n{}", + std::string(part.data, part.size), + "(Valid URL parts are 'PROTOCOL', 'HOST', " + "'PATH', 'REF', 'AUTHORITY', " + "'FILE', 'USERINFO', 'PORT' and 'QUERY')"); + } + url_parts.push_back(url_part); + } else { + url_parts.resize(input_rows_count); + for (int i = 0; i < input_rows_count; i++) { + StringRef part = part_col->get_data_at(i); + UrlParser::UrlPart url_part = UrlParser::get_url_part(part); if (url_part == UrlParser::INVALID) { return Status::RuntimeError("Invalid URL part: {}\n{}", std::string(part.data, part.size), "(Valid URL parts are 'PROTOCOL', 'HOST', " "'PATH', 'REF', 'AUTHORITY', " "'FILE', 'USERINFO', 'PORT' and 'QUERY')"); - } else { - StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); - continue; } + url_parts[i] = url_part; } + } - StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, - res_chars, res_offsets); + if (has_key) { + const bool url_const = col_const[0]; + const bool key_const = col_const[2]; + const auto* key_col = assert_cast(argument_columns[2].get()); + RETURN_IF_ERROR(std::visit( + [&](auto url_const, auto part_const, auto key_const) { + return vector_parse_key( + url_col, url_parts, key_col, input_rows_count, null_map_data, + res_chars, res_offsets); + }, + vectorized::make_bool_variant(url_const), + vectorized::make_bool_variant(part_const), + vectorized::make_bool_variant(key_const))); + } else { + const bool url_const = col_const[0]; + RETURN_IF_ERROR(std::visit( + [&](auto url_const, auto part_const) { + return vector_parse(url_col, url_parts, + input_rows_count, null_map_data, + res_chars, res_offsets); + }, + vectorized::make_bool_variant(url_const), + vectorized::make_bool_variant(part_const))); } block.get_by_position(result).column = ColumnNullable::create(std::move(res), std::move(null_map)); return Status::OK(); } + template + static Status vector_parse(const ColumnString* url_col, + std::vector& url_parts, const int size, + ColumnUInt8::Container& null_map_data, + ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { + for (size_t i = 0; i < size; ++i) { + UrlParser::UrlPart& url_part = url_parts[part_const ? 0 : i]; + StringRef url_val = url_col->get_data_at(url_const ? 0 : i); + StringRef parse_res; + if (UrlParser::parse_url(url_val, url_part, &parse_res)) { + StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, + res_chars, res_offsets); + } else { + StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); + continue; + } + } + return Status::OK(); + } + template + static Status vector_parse_key(const ColumnString* url_col, + std::vector& url_parts, + const ColumnString* key_col, const int size, + ColumnUInt8::Container& null_map_data, + ColumnString::Chars& res_chars, + ColumnString::Offsets& res_offsets) { + for (size_t i = 0; i < size; ++i) { + UrlParser::UrlPart& url_part = url_parts[part_const ? 0 : i]; + StringRef url_val = url_col->get_data_at(url_const ? 0 : i); + StringRef url_key = key_col->get_data_at(key_const ? 0 : i); + StringRef parse_res; + if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { + StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, + res_chars, res_offsets); + } else { + StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); + continue; + } + } + return Status::OK(); + } }; class FunctionUrlDecode : public IFunction {