Skip to content

Commit

Permalink
[Feature](JsonReader) support sepecify $. as root column in json pa…
Browse files Browse the repository at this point in the history
…th (#38213)

```
curl --location-trusted -u root:  -T value.json -H "read_json_by_line:true" -H "format:json"  -H "max_filter_ratio:0.5" -H "jsonpaths: [\"$.id\", \"$.entity_id\", \"$.\"]" http://127.0.0.1:8149/api/regression_test/records/_stream_load
```
  • Loading branch information
eldenmoon authored Jul 29, 2024
1 parent 73519c5 commit 224e25e
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 1 deletion.
5 changes: 5 additions & 0 deletions be/src/exprs/json_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,4 +353,9 @@ void JsonFunctions::merge_objects(rapidjson::Value& dst_object, rapidjson::Value
}
}

// root path "$."
bool JsonFunctions::is_root_path(const std::vector<JsonPath>& json_path) {
return json_path.size() == 2 && json_path[0].key == "$" && json_path[1].key.empty();
}

} // namespace doris
2 changes: 2 additions & 0 deletions be/src/exprs/json_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ class JsonFunctions {

static std::string print_json_value(const rapidjson::Value& value);

static bool is_root_path(const std::vector<JsonPath>& json_path);

private:
static rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths,
rapidjson::Value* document,
Expand Down
14 changes: 13 additions & 1 deletion be/src/vec/exec/format/json/new_json_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1657,7 +1657,19 @@ Status NewJsonReader::_simdjson_write_columns_by_jsonpath(
return st;
}
}
if (i >= _parsed_jsonpaths.size() || st.is<NOT_FOUND>()) {
if (i < _parsed_jsonpaths.size() && JsonFunctions::is_root_path(_parsed_jsonpaths[i])) {
// Indicate that the jsonpath is "$.", read the full root json object, insert the original doc directly
ColumnNullable* nullable_column = nullptr;
IColumn* target_column_ptr = nullptr;
if (slot_desc->is_nullable()) {
nullable_column = assert_cast<ColumnNullable*>(column_ptr);
target_column_ptr = &nullable_column->get_nested_column();
}
auto* column_string = assert_cast<ColumnString*>(target_column_ptr);
column_string->insert_data(_simdjson_ondemand_padding_buffer.data(),
_original_doc_size);
has_valid_value = true;
} else if (i >= _parsed_jsonpaths.size() || st.is<NOT_FOUND>()) {
// not match in jsondata, filling with default value
RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, valid));
if (!(*valid)) {
Expand Down
6 changes: 6 additions & 0 deletions regression-test/data/load_p0/stream_load/test_json_load.out
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,9 @@ test k2_value

-- !select29 --
10 \N

-- !select30 --
12345 {"k1":12345,"k2":"11111","k3":111111,"k4":[11111]} {"k1":12345,"k2":"11111","k3":111111,"k4":[11111]} 111111
12346 {"k1":12346,"k2":"22222","k4":[22222]} {"k1":12346,"k2":"22222","k4":[22222]} \N
12347 {"k1":12347,"k3":"33333","k4":[22222]} {"k1":12347,"k3":"33333","k4":[22222]} 33333
12348 {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} 33333
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"k1" : 12345, "k2" : "11111", "k3" : 111111, "k4" : [11111]}
{"k1" : 12346, "k2" : "22222", "k4" : [22222]}
{"k1" : 12347, "k3" : "33333", "k4" : [22222]}
{"k1" : 12348, "k3" : "33333", "k5" : {"k51" : 1024, "xxxx" : [11111]}}
27 changes: 27 additions & 0 deletions regression-test/suites/load_p0/stream_load/test_json_load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -878,4 +878,31 @@ suite("test_json_load", "p0,nonConcurrent") {
} finally {
try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// support read "$." as root
try {
sql "DROP TABLE IF EXISTS ${testTable}"
sql """CREATE TABLE IF NOT EXISTS ${testTable}
(
`k1` varchar(1024) NULL,
`k2` variant NULL,
`k3` variant NULL,
`k4` variant NULL
)
DUPLICATE KEY(`k1`)
COMMENT ''
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);"""

load_json_data.call("${testTable}", "${testTable}_case30", 'false', 'true', 'json', '', '[\"$.k1\",\"$.\", \"$.\", \"$.k3\"]',
'', '', '', 'test_read_root_path.json')

sql "sync"
qt_select30 "select * from ${testTable} order by k1"

} finally {
// try_sql("DROP TABLE IF EXISTS ${testTable}")
}
}

0 comments on commit 224e25e

Please sign in to comment.