Skip to content

Commit 07939ed

Browse files
committed
stats derive support virtual column
1 parent f3c94ed commit 07939ed

File tree

5 files changed

+90
-23
lines changed

5 files changed

+90
-23
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,26 @@ public Statistics computeOlapScan(OlapScan olapScan) {
638638
checkIfUnknownStatsUsedAsKey(builder);
639639
builder.setRowCount(tableRowCount);
640640
}
641+
return computeVirtualColumnStats(olapScan, builder.build());
642+
}
643+
644+
private Statistics computeVirtualColumnStats(OlapScan relation, Statistics stats) {
645+
List<NamedExpression> virtualColumns;
646+
if (relation instanceof LogicalOlapScan) {
647+
virtualColumns = ((LogicalOlapScan) relation).getVirtualColumns();
648+
} else if (relation instanceof PhysicalOlapScan) {
649+
virtualColumns = ((PhysicalOlapScan) relation).getVirtualColumns();
650+
} else {
651+
return stats;
652+
}
653+
if (virtualColumns.isEmpty()) {
654+
return stats;
655+
}
656+
StatisticsBuilder builder = new StatisticsBuilder(stats);
657+
for (NamedExpression virtualColumn : virtualColumns) {
658+
ColumnStatistic colStats = ExpressionEstimation.estimate(virtualColumn, stats);
659+
builder.putColumnStatistics(virtualColumn.toSlot(), colStats);
660+
}
641661
return builder.build();
642662
}
643663

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,20 @@ public String toString() {
225225
rfV2 = runtimeFiltersV2.toString();
226226
}
227227

228-
String operativeCol = "";
229-
if (!operativeSlots.isEmpty()) {
230-
operativeCol = " operativeSlots(" + operativeSlots + ")";
231-
}
232-
return Utils.toSqlString("PhysicalOlapScan[" + table.getName() + index + partitions + operativeCol + "]"
228+
// String operativeCol = "";
229+
// if (!operativeSlots.isEmpty()) {
230+
// operativeCol = " operativeSlots(" + operativeSlots + ")";
231+
// }
232+
// String vir = "";
233+
// if (!virtualColumns.isEmpty()) {
234+
// vir = " vir(" + virtualColumns + ")";
235+
// }
236+
return Utils.toSqlString("PhysicalOlapScan[" + table.getName() + index + partitions + "]"
233237
+ getGroupIdWithPrefix(),
234-
"stats", statistics, "JRFs", jrfBuilder,
238+
"stats", statistics,
239+
"operativeSlots", operativeSlots,
240+
"virtualColumns", virtualColumns,
241+
"JRFs", jrfBuilder,
235242
"RFV2", rfV2);
236243
}
237244

regression-test/suites/nereids_p0/stats/partitionRowCount.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ suite("partitionRowCount") {
3535
sql """physical plan
3636
select * from partitionRowCountTable where a < 250;
3737
"""
38-
contains("PhysicalOlapScan[partitionRowCountTable partitions(2/3) operativeSlots([a#0])]@0 ( stats=4 )")
38+
contains("PhysicalOlapScan[partitionRowCountTable partitions(2/3))]@0 ( stats=4 operativeSlots=[a#0])")
3939
}
4040

4141
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
suite("virtual_column") {
18+
sql """
19+
drop table if exists virtual_column_table;
20+
create table virtual_column_table (
21+
k1 int null,
22+
v1 int null
23+
) distributed by hash(k1) buckets 1 properties('replication_num' = '1');
24+
25+
insert into virtual_column_table values (1,1),(2,2),(3,3);
26+
alter table virtual_column_table modify column k1 set stats ('row_count'='20', 'ndv'='25', 'min_value'='0', 'max_value'='25');
27+
alter table virtual_column_table modify column v1 set stats ('row_count'='20', 'ndv'='25', 'min_value'='0', 'max_value'='25');
28+
29+
"""
30+
explain {
31+
/*
32+
PhysicalResultSink[103] ( outputExprs=[k1#0] )
33+
+--PhysicalProject[99]@2 ( stats=18, projects=[k1#0] )
34+
+--PhysicalFilter[95]@1 ( stats=18, predicates=AND[((cast(k1 as BIGINT) + cast(v1 as BIGINT))#2 >= 0),(abs((cast(k1 as BIGINT) + cast(v1 as BIGINT))#2) > 5)] )
35+
+--PhysicalOlapScan[virtual_column_table]@0 ( stats=20, operativeSlots=[k1#0, v1#1, (cast(k1 as BIGINT) + cast(v1 as BIGINT))#2], virtualColumns=[(cast(k1#0 as BIGINT) + cast(v1#1 as BIGINT)) AS `(cast(k1 as BIGINT) + cast(v1 as BIGINT))`#2] )
36+
*/
37+
sql "physical plan select k1 from virtual_column_table where k1 + v1 >= 0 and abs(k1+v1) > 5;"
38+
contains "stats=18"
39+
}
40+
}

regression-test/suites/query_p0/operative_slots/operative_slots.groovy

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,33 +47,33 @@ suite("operative_slots") {
4747

4848
explain {
4949
sql "physical plan select * from t join[broadcast] vt on t.k = vt.v['k1'];"
50-
contains("operativeSlots([k#0, __DORIS_DELETE_SIGN__#3])")
51-
contains("operativeSlots([v['k1'")
52-
// expect plan
53-
// PhysicalResultSink[311] ( outputExprs=[k#0, v1#1, user_id#4, name#5, v#6] )
54-
// +--PhysicalDistribute[306]@6 ( stats=1, distributionSpec=DistributionSpecGather )
55-
// +--PhysicalProject[301]@6 ( stats=1, projects=[k#0, v1#1, user_id#4, name#5, v#6] )
56-
// +--PhysicalHashJoin[296]@5 ( stats=1, type=INNER_JOIN, hashCondition=[(k#0 = expr_cast(element_at(v, 'k1') as INT)#7)], otherCondition=[], markCondition=[], hint=[broadcast] )
57-
// |--PhysicalProject[280]@2 ( stats=1, projects=[k#0, v1#1] )
58-
// | +--PhysicalFilter[275]@1 ( stats=1, predicates=(__DORIS_DELETE_SIGN__#2 = 0) )
59-
// | +--PhysicalOlapScan[t operativeSlots([k#0, __DORIS_DELETE_SIGN__#2])]@0 ( stats=1 )
60-
// +--PhysicalDistribute[291]@4 ( stats=1, distributionSpec=DistributionSpecReplicated )
61-
// +--PhysicalProject[286]@4 ( stats=1, projects=[user_id#4, name#5, v#6, cast(v['k1']#15 as INT) AS `expr_cast(element_at(v, 'k1') as INT)`#7] )
62-
// +--PhysicalOlapScan[vt operativeSlots([v['k1']#15])]@3 ( stats=1 )
50+
contains("operativeSlots=[k#0, __DORIS_DELETE_SIGN__#3]")
51+
contains(" operativeSlots=[v['k1']")
52+
/*
53+
PhysicalResultSink[250] ( outputExprs=[k#0, v1#1, v2#2, user_id#5, name#6, v#7] )
54+
+--PhysicalProject[246]@6 ( stats=1, projects=[k#0, v1#1, v2#2, user_id#5, name#6, v#7] )
55+
+--PhysicalHashJoin[242]@5 ( stats=1, type=INNER_JOIN, hashCondition=[(k#0 = expr_cast(element_at(v, 'k1') as INT)#8)], otherCondition=[], markCondition=[], hint=[broadcast] )
56+
|--PhysicalProject[229]@2 ( stats=1, projects=[k#0, v1#1, v2#2] )
57+
| +--PhysicalFilter[225]@1 ( stats=1, predicates=(__DORIS_DELETE_SIGN__#3 = 0) )
58+
| +--PhysicalOlapScan[t]@0 ( stats=1, operativeSlots=[k#0, __DORIS_DELETE_SIGN__#3], virtualColumns=[] )
59+
+--PhysicalDistribute[238]@4 ( stats=1, distributionSpec=DistributionSpecReplicated )
60+
+--PhysicalProject[234]@4 ( stats=1, projects=[user_id#5, name#6, v#7, cast(v['k1']#17 as INT) AS `expr_cast(element_at(v, 'k1') as INT)`#8] )
61+
+--PhysicalOlapScan[vt]@3 ( stats=1, operativeSlots=[v['k1']#17], virtualColumns=[] )
62+
*/
6363
}
6464

6565
explain {
6666
sql "physical plan select * from t where v1=0;"
67-
contains("operativeSlots([v1#1, __DORIS_DELETE_SIGN__#3]")
67+
contains("operativeSlots=[v1#1, __DORIS_DELETE_SIGN__#3]")
6868
}
6969

7070
explain {
7171
sql "physical plan select sum(k) from t group by v1;"
72-
contains("operativeSlots([k#0, v1#1, __DORIS_DELETE_SIGN__#3])")
72+
contains("operativeSlots=[k#0, v1#1, __DORIS_DELETE_SIGN__#3]")
7373
}
7474

7575
explain {
7676
sql "physical plan select rank() over (partition by v2 order by v1) from t;"
77-
contains("operativeSlots([v1#1, v2#2, __DORIS_DELETE_SIGN__#3])")
77+
contains("operativeSlots=[v1#1, v2#2, __DORIS_DELETE_SIGN__#3]")
7878
}
7979
}

0 commit comments

Comments
 (0)