Skip to content

Commit 59a3e4b

Browse files
authored
chore(query): add glob function (#17666)
* chore(query): add glob * add fuzz join tests * add fuzz join tests
1 parent fe274f4 commit 59a3e4b

File tree

13 files changed

+521
-14
lines changed

13 files changed

+521
-14
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ geo = { version = "0.28.0", features = ["use-serde"] }
316316
geohash = "0.13.0"
317317
geozero = { version = "0.14.0", features = ["with-geo", "with-geojson", "with-wkb", "with-wkt"] }
318318
gimli = "0.31.0"
319+
glob = "0.3.0"
319320
globiter = "0.1"
320321
goldenfile = "1.4"
321322
h3o = "0.4.0"

src/query/functions/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ ethnum = { workspace = true }
3434
geo = { workspace = true }
3535
geohash = { workspace = true }
3636
geozero = { workspace = true }
37+
glob = { workspace = true }
3738
h3o = { workspace = true }
3839
hex = { workspace = true }
3940
itertools = { workspace = true }

src/query/functions/src/scalars/comparison.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use databend_common_expression::types::ValueType;
3939
use databend_common_expression::types::VariantType;
4040
use databend_common_expression::types::ALL_NUMBER_CLASSES;
4141
use databend_common_expression::values::Value;
42+
use databend_common_expression::vectorize_with_builder_2_arg;
4243
use databend_common_expression::with_number_mapped_type;
4344
use databend_common_expression::Column;
4445
use databend_common_expression::EvalContext;
@@ -611,6 +612,24 @@ fn register_like(registry: &mut FunctionRegistry) {
611612
}
612613
}),
613614
);
615+
616+
registry.register_passthrough_nullable_2_arg::<StringType, StringType, BooleanType, _, _>(
617+
"glob",
618+
|_, _, _| FunctionDomain::Full,
619+
vectorize_with_builder_2_arg::<StringType, StringType, BooleanType>(
620+
|a, b, builder, _ctx| {
621+
// Create a glob pattern from the second argument
622+
let pattern = match glob::Pattern::new(b) {
623+
Ok(pattern) => pattern,
624+
Err(_) => {
625+
builder.push(false);
626+
return;
627+
}
628+
};
629+
builder.push(pattern.matches(a));
630+
},
631+
),
632+
);
614633
}
615634

616635
fn vectorize_like(

src/query/functions/src/scalars/other.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -502,8 +502,6 @@ pub(crate) mod jaro_winkler {
502502
//! # Examples
503503
//!
504504
//! ```
505-
//! use jaro_winkler::jaro_winkler;
506-
//!
507505
//! assert_eq!(jaro_winkler("martha", "marhta"), 0.9611111111111111);
508506
//! assert_eq!(jaro_winkler("", "words"), 0.0);
509507
//! assert_eq!(jaro_winkler("same", "same"), 1.0);

src/query/functions/tests/it/scalars/regexp.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ fn test_string() {
3030
test_regexp_like(regexp_file);
3131
test_regexp_replace(regexp_file);
3232
test_regexp_substr(regexp_file);
33+
test_glob(regexp_file);
3334
}
3435

3536
fn test_regexp_instr(file: &mut impl Write) {
@@ -257,6 +258,13 @@ fn test_regexp_instr(file: &mut impl Write) {
257258
);
258259
}
259260

261+
fn test_glob(file: &mut impl Write) {
262+
run_ast(file, "glob('Michael!', '*')", &[]);
263+
run_ast(file, "glob('a', 'a')", &[]);
264+
run_ast(file, "glob('abc', 'a*')", &[]);
265+
run_ast(file, "glob('abc', '*bc')", &[]);
266+
}
267+
260268
fn test_regexp_like(file: &mut impl Write) {
261269
run_ast(file, "regexp_like('Michael!', '.*')", &[]);
262270
run_ast(file, "regexp_like('a', '^[a-d]')", &[]);

src/query/functions/tests/it/scalars/testdata/function_list.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,6 +1809,8 @@ Functions overloads:
18091809
1 get_string(Variant NULL, String NULL) :: String NULL
18101810
2 get_string(Variant, Int64) :: String NULL
18111811
3 get_string(Variant NULL, Int64 NULL) :: String NULL
1812+
0 glob(String, String) :: Boolean
1813+
1 glob(String NULL, String NULL) :: Boolean NULL
18121814
0 great_circle_angle(Float64, Float64, Float64, Float64) :: Float32
18131815
1 great_circle_angle(Float64 NULL, Float64 NULL, Float64 NULL, Float64 NULL) :: Float32 NULL
18141816
0 great_circle_distance(Float64, Float64, Float64, Float64) :: Float32

src/query/functions/tests/it/scalars/testdata/regexp.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,3 +938,39 @@ error:
938938

939939

940940

941+
ast : glob('Michael!', '*')
942+
raw expr : glob('Michael!', '*')
943+
checked expr : glob<String, String>("Michael!", "*")
944+
optimized expr : true
945+
output type : Boolean
946+
output domain : {TRUE}
947+
output : true
948+
949+
950+
ast : glob('a', 'a')
951+
raw expr : glob('a', 'a')
952+
checked expr : glob<String, String>("a", "a")
953+
optimized expr : true
954+
output type : Boolean
955+
output domain : {TRUE}
956+
output : true
957+
958+
959+
ast : glob('abc', 'a*')
960+
raw expr : glob('abc', 'a*')
961+
checked expr : glob<String, String>("abc", "a*")
962+
optimized expr : true
963+
output type : Boolean
964+
output domain : {TRUE}
965+
output : true
966+
967+
968+
ast : glob('abc', '*bc')
969+
raw expr : glob('abc', '*bc')
970+
checked expr : glob<String, String>("abc", "*bc")
971+
optimized expr : true
972+
output type : Boolean
973+
output domain : {TRUE}
974+
output : true
975+
976+

tests/sqllogictests/suites/query/functions/02_0047_function_string_regexp_like.test

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,11 +351,57 @@ select regexp_like('µå周çб', '周')
351351
1
352352

353353

354-
355354
query B
356355
select regexp_like('周周周周', '.*')
357356
----
358357
1
359358

359+
query B
360+
SELECT GLOB('abc', 'a?c')
361+
----
362+
1
363+
364+
query B
365+
SELECT GLOB('abc', 'a??')
366+
----
367+
1
360368

369+
query B
370+
SELECT GLOB('abc', 'a*')
371+
----
372+
1
373+
374+
query B
375+
SELECT GLOB('abc', 'a*c')
376+
----
377+
1
378+
379+
query B
380+
SELECT GLOB('abc', 'a*d')
381+
----
382+
0
361383

384+
query B
385+
SELECT GLOB('abc', '?bc')
386+
----
387+
1
388+
389+
query B
390+
SELECT GLOB('abc', '*bc')
391+
----
392+
1
393+
394+
query B
395+
SELECT GLOB('abc', '*bd')
396+
----
397+
0
398+
399+
query B
400+
SELECT GLOB('abc', 'abc')
401+
----
402+
1
403+
404+
query B
405+
SELECT GLOB('abc', 'abcd')
406+
----
407+
0
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
1
2+
1
3+
1
4+
1
5+
true
6+
1
7+
1
8+
1
9+
1
10+
true
11+
1
12+
1
13+
1
14+
1
15+
true
16+
1
17+
1
18+
1
19+
1
20+
true
21+
1
22+
1
23+
1
24+
1
25+
true
26+
1
27+
1
28+
1
29+
1
30+
true
31+
1
32+
1
33+
1
34+
1
35+
true
36+
1
37+
1
38+
1
39+
1
40+
true
41+
1
42+
1
43+
1
44+
1
45+
true
46+
1
47+
1
48+
1
49+
1
50+
true
51+
1
52+
1
53+
1
54+
1
55+
true
56+
1
57+
1
58+
1
59+
1
60+
true
61+
1
62+
1
63+
1
64+
1
65+
true
66+
1
67+
1
68+
1
69+
1
70+
true
71+
1
72+
1
73+
1
74+
1
75+
true
76+
1
77+
1
78+
1
79+
1
80+
true
81+
1
82+
1
83+
1
84+
1
85+
true
86+
1
87+
1
88+
1
89+
1
90+
true
91+
1
92+
1
93+
1
94+
1
95+
true
96+
1
97+
1
98+
1
99+
1
100+
true
101+
1
102+
1
103+
1
104+
1
105+
true

0 commit comments

Comments
 (0)