Skip to content

fix(query): fix variant failed to successfully apply virtual column in bind join #17673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/query/sql/src/planner/binder/bind_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,17 @@ impl VirtualColumnContext {
virtual_columns: Vec::new(),
}
}

pub(crate) fn merge(&mut self, other: &VirtualColumnContext) {
self.allow_pushdown = self.allow_pushdown || other.allow_pushdown;
self.table_indices.extend(other.table_indices.clone());
self.virtual_column_indices
.extend(other.virtual_column_indices.clone());
self.virtual_column_names
.extend(other.virtual_column_names.clone());
self.next_column_ids.extend(other.next_column_ids.clone());
self.virtual_columns.extend(other.virtual_columns.clone());
}
}

/// `BindContext` stores all the free variables in a query and tracks the context of binding procedure.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ impl Binder {
let mut right_column_bindings = right_context.columns.clone();

let mut bind_context = bind_context.replace();
bind_context
.virtual_column_context
.merge(&left_context.virtual_column_context);
bind_context
.virtual_column_context
.merge(&right_context.virtual_column_context);

self.check_table_name_and_condition(
&left_column_bindings,
Expand Down Expand Up @@ -762,7 +768,7 @@ impl<'a> JoinConditionResolver<'a> {
}

fn resolve_predicate(
&self,
&mut self,
predicate: &Expr,
left_join_conditions: &mut Vec<ScalarExpr>,
right_join_conditions: &mut Vec<ScalarExpr>,
Expand Down Expand Up @@ -810,6 +816,9 @@ impl<'a> JoinConditionResolver<'a> {
non_equi_conditions.push(predicate);
}
}
self.join_context
.virtual_column_context
.merge(&join_context.virtual_column_context);
Ok(())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,6 @@ TableScan
├── virtual columns: [v['a'][0], v['b']]
└── estimated rows: 0.00

statement ok
drop table t1

statement ok
drop table if exists t2

Expand Down Expand Up @@ -256,6 +253,218 @@ Filter
├── virtual columns: [v['a'][0], v['b']]
└── estimated rows: 1.00

query T
explain select t1.a, t1.v['b'] from t1 left outer join t2 on t1.v['b'] = t2.a
----
HashJoin
├── output columns: [t1.a (#0), t1.v['b'] (#4)]
├── join type: LEFT OUTER
├── build keys: []
├── probe keys: []
├── keys is null equal: []
├── filters: [TRY_CAST(v['b'] (#4) AS Int32 NULL) = t2.a (#2)]
├── estimated rows: 1.00
├── TableScan(Build)
│ ├── table: default.test_virtual_db.t2
│ ├── output columns: [a (#2)]
│ ├── read rows: 1
│ ├── read size: < 1 KiB
│ ├── partitions total: 1
│ ├── partitions scanned: 1
│ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
│ ├── push downs: [filters: [], limit: NONE]
│ └── estimated rows: 1.00
└── TableScan(Probe)
├── table: default.test_virtual_db.t1
├── output columns: [a (#0), v['b'] (#4)]
├── read rows: 1
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
├── virtual columns: [v['b']]
└── estimated rows: 1.00

query T
explain select t1.a, t1.v['b'] from t1 left outer join t2 on t1.v['b'] = t2.a where t1.v['a'][0] = 1;
----
HashJoin
├── output columns: [t1.a (#0), t1.v['b'] (#4)]
├── join type: RIGHT OUTER
├── build keys: []
├── probe keys: []
├── keys is null equal: []
├── filters: [TRY_CAST(v['b'] (#4) AS Int32 NULL) = t2.a (#2)]
├── estimated rows: 0.00
├── TableScan(Build)
│ ├── table: default.test_virtual_db.t1
│ ├── output columns: [a (#0), v['b'] (#4)]
│ ├── read rows: 1
│ ├── read size: < 1 KiB
│ ├── partitions total: 1
│ ├── partitions scanned: 1
│ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
│ ├── push downs: [filters: [is_true(TRY_CAST(v['a'][0] (#5) AS UInt8 NULL) = 1)], limit: NONE]
│ ├── virtual columns: [v['a'][0], v['b']]
│ └── estimated rows: 0.00
└── TableScan(Probe)
├── table: default.test_virtual_db.t2
├── output columns: [a (#2)]
├── read rows: 1
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
└── estimated rows: 1.00


query T
explain select * from t1 join t2 on t1.v['b'] = t2.a;
----
HashJoin
├── output columns: [t1.a (#0), t1.v (#1), t2.a (#2), t2.v (#3)]
├── join type: INNER
├── build keys: [t2.a (#2)]
├── probe keys: [CAST(v['b'] (#4) AS Int32 NULL)]
├── keys is null equal: [false]
├── filters: []
├── estimated rows: 1.00
├── TableScan(Build)
│ ├── table: default.test_virtual_db.t2
│ ├── output columns: [a (#2), v (#3)]
│ ├── read rows: 1
│ ├── read size: < 1 KiB
│ ├── partitions total: 1
│ ├── partitions scanned: 1
│ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
│ ├── push downs: [filters: [], limit: NONE]
│ └── estimated rows: 1.00
└── TableScan(Probe)
├── table: default.test_virtual_db.t1
├── output columns: [a (#0), v (#1), v['b'] (#4)]
├── read rows: 1
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
├── virtual columns: [v['b']]
└── estimated rows: 1.00

query T
explain select * from t1 join t2 on t1.v['b'] = t2.v['b'];
----
HashJoin
├── output columns: [t1.a (#0), t1.v (#1), t2.a (#2), t2.v (#3)]
├── join type: INNER
├── build keys: [v['b'] (#5)]
├── probe keys: [v['b'] (#4)]
├── keys is null equal: [false]
├── filters: []
├── estimated rows: 1.00
├── TableScan(Build)
│ ├── table: default.test_virtual_db.t2
│ ├── output columns: [a (#2), v (#3), v['b'] (#5)]
│ ├── read rows: 1
│ ├── read size: < 1 KiB
│ ├── partitions total: 1
│ ├── partitions scanned: 1
│ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
│ ├── push downs: [filters: [], limit: NONE]
│ ├── virtual columns: [v['b']]
│ └── estimated rows: 1.00
└── TableScan(Probe)
├── table: default.test_virtual_db.t1
├── output columns: [a (#0), v (#1), v['b'] (#4)]
├── read rows: 1
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
├── virtual columns: [v['b']]
└── estimated rows: 1.00

query T
explain select * from t1 join t2 on t1.v['b'] > t2.a;
----
HashJoin
├── output columns: [t1.a (#0), t1.v (#1), t2.a (#2), t2.v (#3)]
├── join type: INNER
├── build keys: []
├── probe keys: []
├── keys is null equal: []
├── filters: [TRY_CAST(v['b'] (#4) AS Int32 NULL) > t2.a (#2)]
├── estimated rows: 1.00
├── TableScan(Build)
│ ├── table: default.test_virtual_db.t2
│ ├── output columns: [a (#2), v (#3)]
│ ├── read rows: 1
│ ├── read size: < 1 KiB
│ ├── partitions total: 1
│ ├── partitions scanned: 1
│ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
│ ├── push downs: [filters: [], limit: NONE]
│ └── estimated rows: 1.00
└── TableScan(Probe)
├── table: default.test_virtual_db.t1
├── output columns: [a (#0), v (#1), v['b'] (#4)]
├── read rows: 1
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
├── virtual columns: [v['b']]
└── estimated rows: 1.00

query T
explain select t1.v['b'], sum(t2.a) from t1 join t2 on t1.v['b'] = t2.a group by t1.v['b'];
----
AggregateFinal
├── output columns: [sum(t2.a) (#5), t1.v['b'] (#4)]
├── group by: [v['b']]
├── aggregate functions: [sum(a)]
├── estimated rows: 1.00
└── AggregatePartial
├── group by: [v['b']]
├── aggregate functions: [sum(a)]
├── estimated rows: 1.00
└── HashJoin
├── output columns: [t1.v['b'] (#4), t2.a (#2)]
├── join type: INNER
├── build keys: [t2.a (#2)]
├── probe keys: [CAST(v['b'] (#4) AS Int32 NULL)]
├── keys is null equal: [false]
├── filters: []
├── estimated rows: 1.00
├── TableScan(Build)
│ ├── table: default.test_virtual_db.t2
│ ├── output columns: [a (#2)]
│ ├── read rows: 1
│ ├── read size: < 1 KiB
│ ├── partitions total: 1
│ ├── partitions scanned: 1
│ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
│ ├── push downs: [filters: [], limit: NONE]
│ └── estimated rows: 1.00
└── TableScan(Probe)
├── table: default.test_virtual_db.t1
├── output columns: [v['b'] (#4)]
├── read rows: 1
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [], limit: NONE]
├── virtual columns: [v['b']]
└── estimated rows: 1.00

statement ok
drop table t1

statement ok
drop table t2

Expand Down