Skip to content

Commit a22f24a

Browse files
authored
[feat](function) Add new function of strip_null_value (#57386)
### What problem does this PR solve? Doc: apache/doris-website#3040 Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into -->
1 parent 75c8291 commit a22f24a

File tree

6 files changed

+257
-0
lines changed

6 files changed

+257
-0
lines changed

be/src/vec/functions/function_jsonb.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3028,6 +3028,71 @@ class FunctionJsonbRemove : public IFunction {
30283028
}
30293029
};
30303030

3031+
class FunctionStripNullValue : public IFunction {
3032+
public:
3033+
static constexpr auto name = "strip_null_value";
3034+
static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); }
3035+
3036+
String get_name() const override { return name; }
3037+
bool is_variadic() const override { return false; }
3038+
size_t get_number_of_arguments() const override { return 1; }
3039+
3040+
bool use_default_implementation_for_nulls() const override { return false; }
3041+
3042+
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3043+
return make_nullable(std::make_shared<DataTypeJsonb>());
3044+
}
3045+
3046+
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3047+
uint32_t result, size_t input_rows_count) const override {
3048+
const auto& arg_column = block.get_by_position(arguments[0]).column;
3049+
const ColumnString* json_column = nullptr;
3050+
const NullMap* json_null_map = nullptr;
3051+
if (arg_column->is_nullable()) {
3052+
const auto& nullable_col = assert_cast<const ColumnNullable&>(*arg_column);
3053+
json_column = assert_cast<const ColumnString*>(&nullable_col.get_nested_column());
3054+
json_null_map = &nullable_col.get_null_map_data();
3055+
} else {
3056+
json_column = assert_cast<const ColumnString*>(arg_column.get());
3057+
}
3058+
3059+
auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
3060+
auto result_column = return_data_type->create_column();
3061+
3062+
auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data();
3063+
auto& result_data_col = assert_cast<ColumnString&>(
3064+
assert_cast<ColumnNullable&>(*result_column).get_nested_column());
3065+
3066+
result_nullmap.resize_fill(input_rows_count, 0);
3067+
for (size_t i = 0; i != input_rows_count; ++i) {
3068+
if (json_null_map && (*json_null_map)[i]) {
3069+
result_nullmap[i] = 1;
3070+
result_data_col.insert_default();
3071+
continue;
3072+
}
3073+
JsonbDocument* json_doc = nullptr;
3074+
const auto& json_str = json_column->get_data_at(i);
3075+
RETURN_IF_ERROR(
3076+
JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc));
3077+
if (json_doc) [[likely]] {
3078+
if (json_doc->getValue()->isNull()) {
3079+
result_nullmap[i] = 1;
3080+
result_data_col.insert_default();
3081+
} else {
3082+
result_nullmap[i] = 0;
3083+
result_data_col.insert_data(json_str.data, json_str.size);
3084+
}
3085+
} else {
3086+
result_nullmap[i] = 1;
3087+
result_data_col.insert_default();
3088+
}
3089+
}
3090+
3091+
block.get_by_position(result).column = std::move(result_column);
3092+
return Status::OK();
3093+
}
3094+
};
3095+
30313096
void register_function_jsonb(SimpleFunctionFactory& factory) {
30323097
factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name);
30333098
factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias);
@@ -3079,6 +3144,8 @@ void register_function_jsonb(SimpleFunctionFactory& factory) {
30793144

30803145
factory.register_function<FunctionJsonbRemove>();
30813146
factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias);
3147+
3148+
factory.register_function<FunctionStripNullValue>();
30823149
}
30833150

30843151
} // namespace doris::vectorized

fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@
469469
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
470470
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToMap;
471471
import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp;
472+
import org.apache.doris.nereids.trees.expressions.functions.scalar.StripNullValue;
472473
import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement;
473474
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBinary;
474475
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap;
@@ -1011,6 +1012,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
10111012
scalar(StY.class, "st_y"),
10121013
scalar(StartsWith.class, "starts_with"),
10131014
scalar(Strcmp.class, "strcmp"),
1015+
scalar(StripNullValue.class, "strip_null_value"),
10141016
scalar(StrToDate.class, "str_to_date"),
10151017
scalar(StrToMap.class, "str_to_map"),
10161018
scalar(SubBinary.class, "sub_binary"),
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.functions.scalar;
19+
20+
import org.apache.doris.catalog.FunctionSignature;
21+
import org.apache.doris.nereids.trees.expressions.Expression;
22+
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
23+
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
24+
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
25+
import org.apache.doris.nereids.types.JsonType;
26+
27+
import com.google.common.base.Preconditions;
28+
import com.google.common.collect.ImmutableList;
29+
30+
import java.util.List;
31+
32+
/**
33+
* ScalarFunction 'strip_null_value'.
34+
*/
35+
public class StripNullValue extends ScalarFunction implements ExplicitlyCastableSignature, AlwaysNullable {
36+
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
37+
FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE)
38+
);
39+
40+
public StripNullValue(Expression arg) {
41+
super("strip_null_value", arg);
42+
}
43+
44+
/** constructor for withChildren and reuse signature */
45+
private StripNullValue(ScalarFunctionParams functionParams) {
46+
super(functionParams);
47+
}
48+
49+
/**
50+
* withChildren.
51+
*/
52+
@Override
53+
public StripNullValue withChildren(List<Expression> children) {
54+
Preconditions.checkArgument(children.size() == 1);
55+
return new StripNullValue(getFunctionParams(children));
56+
}
57+
58+
@Override
59+
public List<FunctionSignature> getSignatures() {
60+
return SIGNATURES;
61+
}
62+
63+
@Override
64+
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
65+
return visitor.visitStripNullValue(this, context);
66+
}
67+
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@
470470
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
471471
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToMap;
472472
import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp;
473+
import org.apache.doris.nereids.trees.expressions.functions.scalar.StripNullValue;
473474
import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement;
474475
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBinary;
475476
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap;
@@ -2437,6 +2438,10 @@ default R visitStrcmp(Strcmp strcmp, C context) {
24372438
return visitScalarFunction(strcmp, context);
24382439
}
24392440

2441+
default R visitStripNullValue(StripNullValue stripNullValue, C context) {
2442+
return visitScalarFunction(stripNullValue, context);
2443+
}
2444+
24402445
default R visitVersion(Version version, C context) {
24412446
return visitScalarFunction(version, context);
24422447
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !test --
3+
1 "null" 30
4+
2 "Bob" \N
5+
3 \N \N
6+
4 \N \N
7+
8+
-- !test2 --
9+
1 "Alice2" \N
10+
2 "Bob" \N
11+
3 "Jack" 28
12+
4 "Jim" 33
13+
14+
-- !test2 --
15+
1 "a" "a"
16+
2 \N \N
17+
3 \N "c"
18+
4 \N \N
19+
20+
-- !test3 --
21+
1 "aaa" 123 "aaa" 123
22+
2 "bbbb" "a123" "bbbb" "a123"
23+
3 \N \N \N \N
24+
4 \N \N \N 7890
25+
26+
-- !const --
27+
"aaa" \N "ccc" \N \N
28+
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("test_strip_null_value") {
19+
sql """DROP TABLE IF EXISTS `test_strip_null_value_table`;"""
20+
sql """CREATE TABLE test_strip_null_value_table (
21+
id INT,
22+
json_value JSON,
23+
json_value_non_null JSON not null
24+
) PROPERTIES ("replication_num"="1");"""
25+
26+
sql """INSERT INTO test_strip_null_value_table VALUES
27+
(1, '{"name": "null", "age": 30, "a": "aaa", "b": "b", "c": null}', '{"name": "Alice2", "age": null, "a": 123, "c": null}'),
28+
(2, '{"name": "Bob", "age": null, "b": "bbbb", "c": 23423, "d": null}', '{"name": "Bob", "age": null, "b": "a123", "c": null, "d": 9993}'),
29+
(3, null, '{"name": "Jack", "age": 28, "a": null, "b": null, "c": null}'),
30+
(4, null, '{"name": "Jim", "age": 33, "a": 1234, "b": 4567, "d": 7890}');
31+
"""
32+
33+
qt_test """
34+
select id,
35+
strip_null_value(json_extract(json_value, '\$.name')) striped,
36+
strip_null_value(json_extract(json_value, '\$.age')) as striped2
37+
from test_strip_null_value_table order by 1;
38+
"""
39+
40+
qt_test2 """
41+
select id,
42+
strip_null_value(json_extract(json_value_non_null, '\$.name')) striped,
43+
strip_null_value(json_extract(json_value_non_null, '\$.age')) striped2
44+
from test_strip_null_value_table order by 1;
45+
"""
46+
47+
sql """DROP TABLE IF EXISTS `test_strip_null_value_paths_table`;"""
48+
sql """CREATE TABLE test_strip_null_value_paths_table (
49+
id INT,
50+
path string,
51+
path_not_null string not null
52+
) PROPERTIES ("replication_num"="1");"""
53+
54+
sql """INSERT INTO test_strip_null_value_paths_table VALUES
55+
(1, '\$.a', '\$.a'),
56+
(2, '\$.b', '\$.b'),
57+
(3, null, '\$.c'),
58+
(4, null, '\$.d');
59+
"""
60+
61+
qt_test2 """
62+
select
63+
id,
64+
strip_null_value(json_extract('{"a": "a", "b": null, "c": "c", "d": null}', path)) striped1,
65+
strip_null_value(json_extract('{"a": "a", "b": null, "c": "c", "d": null}', path_not_null)) striped2
66+
from test_strip_null_value_paths_table order by 1;
67+
"""
68+
69+
qt_test3 """
70+
select
71+
t1.id,
72+
strip_null_value(json_extract(t1.json_value, t2.path)) striped1,
73+
strip_null_value(json_extract(t1.json_value_non_null, t2.path)) striped2,
74+
strip_null_value(json_extract(t1.json_value, t2.path_not_null)) striped3,
75+
strip_null_value(json_extract(t1.json_value_non_null, t2.path_not_null)) striped4
76+
from test_strip_null_value_table t1
77+
inner join test_strip_null_value_paths_table t2 on t1.id = t2.id
78+
order by t1.id;
79+
"""
80+
81+
qt_const """
82+
select strip_null_value(json_extract('{"a": "aaa", "b": null, "c": "ccc", "d": null}', '\$.a')) as striped1,
83+
strip_null_value(json_extract('{"a": "aaa", "b": null, "c": "ccc", "d": null}', '\$.b')) as striped2,
84+
strip_null_value(json_extract('{"a": "aaa", "b": null, "c": "ccc", "d": null}', '\$.c')) as striped3,
85+
strip_null_value(json_extract('{"a": "aaa", "b": null, "c": "ccc", "d": null}', '\$.d')) as striped4,
86+
strip_null_value(NULL) as striped5;
87+
"""
88+
}

0 commit comments

Comments
 (0)