Skip to content

Commit

Permalink
[Feature](json_functions) support json-keys (#36411)
Browse files Browse the repository at this point in the history
  • Loading branch information
amorynan authored Jun 26, 2024
1 parent ce0954f commit 309df53
Show file tree
Hide file tree
Showing 9 changed files with 563 additions and 0 deletions.
174 changes: 174 additions & 0 deletions be/src/vec/functions/function_jsonb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,177 @@ class FunctionJsonbExtract : public IFunction {
}
};

class FunctionJsonbKeys : public IFunction {
public:
static constexpr auto name = "json_keys";
static constexpr auto alias = "jsonb_keys";
static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); }
String get_name() const override { return name; }
bool is_variadic() const override { return true; }
size_t get_number_of_arguments() const override { return 0; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
}

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
DCHECK_GE(arguments.size(), 1);
if (arguments.size() != 1 && arguments.size() != 2) {
// here has argument param error
return Status::InvalidArgument("json_keys should have 1 or 2 arguments");
}

ColumnPtr jsonb_data_column = nullptr;
const NullMap* data_null_map = nullptr;
// prepare jsonb data column
jsonb_data_column = unpack_if_const(block.get_by_position(arguments[0]).column).first;
if (block.get_by_position(arguments[0]).column->is_nullable()) {
const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column);
jsonb_data_column = nullable->get_nested_column_ptr();
data_null_map = &nullable->get_null_map_data();
}
const ColumnString* col_from_string = check_and_get_column<ColumnString>(jsonb_data_column);

// prepare parse path column prepare, maybe we do not have path column
ColumnPtr jsonb_path_column = nullptr;
const ColumnString* jsonb_path_col = nullptr;
bool path_const = false;
const NullMap* path_null_map = nullptr;
if (arguments.size() == 2) {
// we have should have a ColumnString for path
std::tie(jsonb_path_column, path_const) =
unpack_if_const(block.get_by_position(arguments[1]).column);
if (block.get_by_position(arguments[1]).column->is_nullable()) {
const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_path_column);
jsonb_path_column = nullable->get_nested_column_ptr();
path_null_map = &nullable->get_null_map_data();
}
jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column);
}

auto null_map = ColumnUInt8::create(input_rows_count, 0);
NullMap& res_null_map = null_map->get_data();

auto dst_arr = ColumnArray::create(
ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()),
ColumnArray::ColumnOffsets::create());
ColumnNullable& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data());

Status st;
if (jsonb_path_column) {
if (path_const) {
st = inner_loop_impl<true, true>(input_rows_count, *dst_arr, dst_nested_column,
res_null_map, *col_from_string, data_null_map,
jsonb_path_col, path_null_map);
} else {
st = inner_loop_impl<true, false>(input_rows_count, *dst_arr, dst_nested_column,
res_null_map, *col_from_string, data_null_map,
jsonb_path_col, path_null_map);
}
} else {
st = inner_loop_impl<false, false>(input_rows_count, *dst_arr, dst_nested_column,
res_null_map, *col_from_string, data_null_map,
jsonb_path_col, path_null_map);
}
if (!st.ok()) {
return st;
}
block.get_by_position(result).column =
ColumnNullable::create(std::move(dst_arr), std::move(null_map));
return st;
}

private:
template <bool JSONB_PATH_PARAM, bool JSON_PATH_CONST>
static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr,
ColumnNullable& dst_nested_column,
NullMap& res_null_map,
const ColumnString& col_from_string,
const NullMap* jsonb_data_nullmap,
const ColumnString* jsonb_path_column,
const NullMap* path_null_map) {
// if path is const, we just need to parse it once
JsonbPath const_path;
if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
return Status::InvalidArgument(
"Json path error: {} for value: {}",
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
r_raw_ref.to_string());
}
}
const auto& ldata = col_from_string.get_chars();
const auto& loffsets = col_from_string.get_offsets();
for (size_t i = 0; i < input_rows_count; ++i) {
// if jsonb data is null or path column is null , we should return null
if (jsonb_data_nullmap && (&jsonb_data_nullmap)[i]) {
res_null_map[i] = 1;
dst_arr.insert_default();
continue;
}
if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
if (path_null_map && (&path_null_map)[i]) {
res_null_map[i] = 1;
dst_arr.insert_default();
continue;
}
}
// extract jsonb keys
size_t l_off = loffsets[i - 1];
size_t l_size = loffsets[i] - l_off;
if (l_size == 0) {
res_null_map[i] = 1;
dst_arr.insert_default();
continue;
}
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
if (UNLIKELY(!doc || !doc->getValue())) {
dst_arr.clear();
return Status::InvalidArgument("jsonb data is invalid");
}
JsonbValue* obj_val;
if constexpr (JSONB_PATH_PARAM) {
if constexpr (!JSON_PATH_CONST) {
const ColumnString::Chars& rdata = jsonb_path_column->get_chars();
const ColumnString::Offsets& roffsets = jsonb_path_column->get_offsets();
size_t r_off = roffsets[i - 1];
size_t r_size = roffsets[i] - r_off;
const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
JsonbPath path;
if (!path.seek(r_raw, r_size)) {
return Status::InvalidArgument(
"Json path error: {} for value: {}",
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
std::string_view(reinterpret_cast<const char*>(rdata.data()),
rdata.size()));
}
obj_val = doc->getValue()->findValue(path, nullptr);
} else {
obj_val = doc->getValue()->findValue(const_path, nullptr);
}
} else {
obj_val = doc->getValue();
}

if (!obj_val || !obj_val->isObject()) {
// if jsonb data is not object we should return null
res_null_map[i] = 1;
dst_arr.insert_default();
continue;
}
ObjectVal* obj = (ObjectVal*)obj_val;
for (auto it = obj->begin(); it != obj->end(); ++it) {
dst_nested_column.insert_data(it->getKeyStr(), it->klen());
}
dst_arr.get_offsets().push_back(dst_nested_column.size());
} //for
return Status::OK();
}
};

class FunctionJsonbExtractPath : public IFunction {
public:
static constexpr auto name = "json_exists_path";
Expand Down Expand Up @@ -1468,6 +1639,9 @@ void register_function_jsonb(SimpleFunctionFactory& factory) {
factory.register_function<FunctionJsonbType>();
factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias);

factory.register_function<FunctionJsonbKeys>();
factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias);

factory.register_function<FunctionJsonbExtractIsnull>();
factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias);
factory.register_function<FunctionJsonbExtractBool>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonExtract;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonInsert;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonKeys;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonLength;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonObject;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonQuote;
Expand Down Expand Up @@ -715,6 +716,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
scalar(JsonbType.class, "jsonb_type"),
scalar(JsonLength.class, "json_length"),
scalar(JsonContains.class, "json_contains"),
scalar(JsonKeys.class, "json_keys", "jsonb_keys"),
scalar(L1Distance.class, "l1_distance"),
scalar(L2Distance.class, "l2_distance"),
scalar(LastDay.class, "last_day"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.functions.scalar;

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.JsonType;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.types.VarcharType;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.util.List;

/**
* JsonKeys returns the keys from the top-level value of a JSON object as a JSON array,
* or, if a path argument is given, the top-level keys from the selected path.
* Returns NULL if any argument is NULL, the json_doc argument is not an object,
* or path, if given, does not locate an object.
* An error occurs if the json_doc argument is not a valid JSON document
* or the path argument is not a valid path expression.
*/
public class JsonKeys extends ScalarFunction
implements ExplicitlyCastableSignature, AlwaysNullable {

public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(ArrayType.of(StringType.INSTANCE))
.args(JsonType.INSTANCE, VarcharType.SYSTEM_DEFAULT),
FunctionSignature.ret(ArrayType.of(StringType.INSTANCE)).args(JsonType.INSTANCE));

/**
* constructor with one or two arguments.
*/
public JsonKeys(Expression arg0) {
super("json_keys", arg0);
}

public JsonKeys(Expression arg0, Expression arg1) {
super("json_keys", arg0, arg1);
}

/**
* withChildren.
*/
@Override
public JsonKeys withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1 || children.size() == 2);
if (children.size() == 1) {
return new JsonKeys(children.get(0));
} else {
return new JsonKeys(children.get(0), children.get(1));
}
}

@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}

@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitJsonKeys(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonExtract;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonInsert;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonKeys;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonLength;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonObject;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonQuote;
Expand Down Expand Up @@ -1320,6 +1321,10 @@ default R visitJsonExtract(JsonExtract jsonExtract, C context) {
return visitScalarFunction(jsonExtract, context);
}

default R visitJsonKeys(JsonKeys jsonKeys, C context) {
return visitScalarFunction(jsonKeys, context);
}

default R visitJsonInsert(JsonInsert jsonInsert, C context) {
return visitScalarFunction(jsonInsert, context);
}
Expand Down
5 changes: 5 additions & 0 deletions gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,11 @@
[['jsonb_type'], 'STRING', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],

[['jsonb_keys'], 'ARRAY_STRING', ['JSONB'], 'ALWAYS_NULLABLE'],
[['jsonb_keys'], 'ARRAY_STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_keys'], 'ARRAY_STRING', ['JSONB'], 'ALWAYS_NULLABLE'],
[['json_keys'], 'ARRAY_STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],

[['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR', '...'], 'ALWAYS_NULLABLE'],
[['jsonb_extract'], 'JSONB', ['JSONB', 'STRING', '...'], 'ALWAYS_NULLABLE'],
[['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
Expand Down
Loading

0 comments on commit 309df53

Please sign in to comment.