Skip to content

Commit e5611b4

Browse files
committed
fix issue where CTE could not be referenced more than 1 time
1 parent 38e95dd commit e5611b4

File tree

3 files changed

+147
-35
lines changed

3 files changed

+147
-35
lines changed

datafusion/core/src/physical_planner.rs

+57-27
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
2020
use std::collections::HashMap;
2121
use std::fmt::Write;
22-
use std::sync::Arc;
22+
use std::sync::atomic::AtomicI32;
23+
use std::sync::{Arc, OnceLock};
2324

2425
use crate::datasource::file_format::arrow::ArrowFormat;
2526
use crate::datasource::file_format::avro::AvroFormat;
@@ -89,8 +90,8 @@ use datafusion_expr::expr::{
8990
use datafusion_expr::expr_rewriter::unnormalize_cols;
9091
use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
9192
use datafusion_expr::{
92-
DescribeTable, DmlStatement, ScalarFunctionDefinition, StringifiedPlan, WindowFrame,
93-
WindowFrameBound, WriteOp, NamedRelation, RecursiveQuery,
93+
DescribeTable, DmlStatement, NamedRelation, RecursiveQuery, ScalarFunctionDefinition,
94+
StringifiedPlan, WindowFrame, WindowFrameBound, WriteOp,
9495
};
9596
use datafusion_physical_expr::expressions::Literal;
9697
use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
@@ -452,11 +453,13 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
452453
logical_plan: &LogicalPlan,
453454
session_state: &SessionState,
454455
) -> Result<Arc<dyn ExecutionPlan>> {
456+
reset_recursive_cte_physical_plan_branch_number();
457+
455458
match self.handle_explain(logical_plan, session_state).await? {
456459
Some(plan) => Ok(plan),
457460
None => {
458461
let plan = self
459-
.create_initial_plan(logical_plan, session_state)
462+
.create_initial_plan(logical_plan, session_state, None)
460463
.await?;
461464
self.optimize_internal(plan, session_state, |_, _| {})
462465
}
@@ -487,6 +490,23 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
487490
}
488491
}
489492

493+
// atomic global incrmenter
494+
495+
static RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH: OnceLock<AtomicI32> = OnceLock::new();
496+
497+
fn new_recursive_cte_physical_plan_branch_number() -> u32 {
498+
let counter = RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
499+
.get_or_init(|| AtomicI32::new(0))
500+
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
501+
counter as u32
502+
}
503+
504+
fn reset_recursive_cte_physical_plan_branch_number() {
505+
RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
506+
.get_or_init(|| AtomicI32::new(0))
507+
.store(0, std::sync::atomic::Ordering::SeqCst);
508+
}
509+
490510
impl DefaultPhysicalPlanner {
491511
/// Create a physical planner that uses `extension_planners` to
492512
/// plan user-defined logical nodes [`LogicalPlan::Extension`].
@@ -507,6 +527,7 @@ impl DefaultPhysicalPlanner {
507527
&'a self,
508528
logical_plans: impl IntoIterator<Item = &'a LogicalPlan> + Send + 'a,
509529
session_state: &'a SessionState,
530+
ctx: Option<&'a String>,
510531
) -> BoxFuture<'a, Result<Vec<Arc<dyn ExecutionPlan>>>> {
511532
async move {
512533
// First build futures with as little references as possible, then performing some stream magic.
@@ -519,7 +540,7 @@ impl DefaultPhysicalPlanner {
519540
.into_iter()
520541
.enumerate()
521542
.map(|(idx, lp)| async move {
522-
let plan = self.create_initial_plan(lp, session_state).await?;
543+
let plan = self.create_initial_plan(lp, session_state, ctx).await?;
523544
Ok((idx, plan)) as Result<_>
524545
})
525546
.collect::<Vec<_>>();
@@ -548,6 +569,7 @@ impl DefaultPhysicalPlanner {
548569
&'a self,
549570
logical_plan: &'a LogicalPlan,
550571
session_state: &'a SessionState,
572+
ctx: Option<&'a String>,
551573
) -> BoxFuture<'a, Result<Arc<dyn ExecutionPlan>>> {
552574
async move {
553575
let exec_plan: Result<Arc<dyn ExecutionPlan>> = match logical_plan {
@@ -572,7 +594,7 @@ impl DefaultPhysicalPlanner {
572594
single_file_output,
573595
copy_options,
574596
}) => {
575-
let input_exec = self.create_initial_plan(input, session_state).await?;
597+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
576598
let parsed_url = ListingTableUrl::parse(output_url)?;
577599
let object_store_url = parsed_url.object_store();
578600

@@ -620,7 +642,7 @@ impl DefaultPhysicalPlanner {
620642
let name = table_name.table();
621643
let schema = session_state.schema_for_ref(table_name)?;
622644
if let Some(provider) = schema.table(name).await {
623-
let input_exec = self.create_initial_plan(input, session_state).await?;
645+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
624646
provider.insert_into(session_state, input_exec, false).await
625647
} else {
626648
return exec_err!(
@@ -637,7 +659,7 @@ impl DefaultPhysicalPlanner {
637659
let name = table_name.table();
638660
let schema = session_state.schema_for_ref(table_name)?;
639661
if let Some(provider) = schema.table(name).await {
640-
let input_exec = self.create_initial_plan(input, session_state).await?;
662+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
641663
provider.insert_into(session_state, input_exec, true).await
642664
} else {
643665
return exec_err!(
@@ -678,7 +700,7 @@ impl DefaultPhysicalPlanner {
678700
);
679701
}
680702

681-
let input_exec = self.create_initial_plan(input, session_state).await?;
703+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
682704

683705
// at this moment we are guaranteed by the logical planner
684706
// to have all the window_expr to have equal sort key
@@ -774,7 +796,7 @@ impl DefaultPhysicalPlanner {
774796
..
775797
}) => {
776798
// Initially need to perform the aggregate and then merge the partitions
777-
let input_exec = self.create_initial_plan(input, session_state).await?;
799+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
778800
let physical_input_schema = input_exec.schema();
779801
let logical_input_schema = input.as_ref().schema();
780802

@@ -848,7 +870,7 @@ impl DefaultPhysicalPlanner {
848870
)?))
849871
}
850872
LogicalPlan::Projection(Projection { input, expr, .. }) => {
851-
let input_exec = self.create_initial_plan(input, session_state).await?;
873+
let input_exec = self.create_initial_plan(input, session_state, ctx).await?;
852874
let input_schema = input.as_ref().schema();
853875

854876
let physical_exprs = expr
@@ -900,7 +922,7 @@ impl DefaultPhysicalPlanner {
900922
)?))
901923
}
902924
LogicalPlan::Filter(filter) => {
903-
let physical_input = self.create_initial_plan(&filter.input, session_state).await?;
925+
let physical_input = self.create_initial_plan(&filter.input, session_state, ctx).await?;
904926
let input_schema = physical_input.as_ref().schema();
905927
let input_dfschema = filter.input.schema();
906928

@@ -914,16 +936,16 @@ impl DefaultPhysicalPlanner {
914936
let filter = FilterExec::try_new(runtime_expr, physical_input)?;
915937
Ok(Arc::new(filter.with_default_selectivity(selectivity)?))
916938
}
917-
LogicalPlan::Union(Union { inputs, .. }) => {
918-
let physical_plans = self.create_initial_plan_multi(inputs.iter().map(|lp| lp.as_ref()), session_state).await?;
939+
LogicalPlan::Union(Union { inputs, schema }) => {
940+
let physical_plans = self.create_initial_plan_multi(inputs.iter().map(|lp| lp.as_ref()), session_state, ctx).await?;
919941

920942
Ok(Arc::new(UnionExec::new(physical_plans)))
921943
}
922944
LogicalPlan::Repartition(Repartition {
923945
input,
924946
partitioning_scheme,
925947
}) => {
926-
let physical_input = self.create_initial_plan(input, session_state).await?;
948+
let physical_input = self.create_initial_plan(input, session_state, ctx).await?;
927949
let input_schema = physical_input.schema();
928950
let input_dfschema = input.as_ref().schema();
929951
let physical_partitioning = match partitioning_scheme {
@@ -954,7 +976,7 @@ impl DefaultPhysicalPlanner {
954976
)?))
955977
}
956978
LogicalPlan::Sort(Sort { expr, input, fetch, .. }) => {
957-
let physical_input = self.create_initial_plan(input, session_state).await?;
979+
let physical_input = self.create_initial_plan(input, session_state, ctx).await?;
958980
let input_schema = physical_input.as_ref().schema();
959981
let input_dfschema = input.as_ref().schema();
960982
let sort_expr = expr
@@ -1045,12 +1067,12 @@ impl DefaultPhysicalPlanner {
10451067
};
10461068

10471069
return self
1048-
.create_initial_plan(&join_plan, session_state)
1070+
.create_initial_plan(&join_plan, session_state, ctx)
10491071
.await;
10501072
}
10511073

10521074
// All equi-join keys are columns now, create physical join plan
1053-
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state).await?;
1075+
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state, ctx).await?;
10541076
let [physical_left, physical_right]: [Arc<dyn ExecutionPlan>; 2] = left_right.try_into().map_err(|_| DataFusionError::Internal("`create_initial_plan_multi` is broken".to_string()))?;
10551077
let left_df_schema = left.schema();
10561078
let right_df_schema = right.schema();
@@ -1185,7 +1207,7 @@ impl DefaultPhysicalPlanner {
11851207
}
11861208
}
11871209
LogicalPlan::CrossJoin(CrossJoin { left, right, .. }) => {
1188-
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state).await?;
1210+
let left_right = self.create_initial_plan_multi([left.as_ref(), right.as_ref()], session_state, ctx).await?;
11891211
let [left, right]: [Arc<dyn ExecutionPlan>; 2] = left_right.try_into().map_err(|_| DataFusionError::Internal("`create_initial_plan_multi` is broken".to_string()))?;
11901212
Ok(Arc::new(CrossJoinExec::new(left, right)))
11911213
}
@@ -1203,10 +1225,10 @@ impl DefaultPhysicalPlanner {
12031225
SchemaRef::new(schema.as_ref().to_owned().into()),
12041226
))),
12051227
LogicalPlan::SubqueryAlias(SubqueryAlias { input, .. }) => {
1206-
self.create_initial_plan(input, session_state).await
1228+
self.create_initial_plan(input, session_state, ctx).await
12071229
}
12081230
LogicalPlan::Limit(Limit { input, skip, fetch, .. }) => {
1209-
let input = self.create_initial_plan(input, session_state).await?;
1231+
let input = self.create_initial_plan(input, session_state, ctx).await?;
12101232

12111233
// GlobalLimitExec requires a single partition for input
12121234
let input = if input.output_partitioning().partition_count() == 1 {
@@ -1224,7 +1246,7 @@ impl DefaultPhysicalPlanner {
12241246
Ok(Arc::new(GlobalLimitExec::new(input, *skip, *fetch)))
12251247
}
12261248
LogicalPlan::Unnest(Unnest { input, column, schema, options }) => {
1227-
let input = self.create_initial_plan(input, session_state).await?;
1249+
let input = self.create_initial_plan(input, session_state, ctx).await?;
12281250
let column_exec = schema.index_of_column(column)
12291251
.map(|idx| Column::new(&column.name, idx))?;
12301252
let schema = SchemaRef::new(schema.as_ref().to_owned().into());
@@ -1277,7 +1299,7 @@ impl DefaultPhysicalPlanner {
12771299
"Unsupported logical plan: Analyze must be root of the plan"
12781300
),
12791301
LogicalPlan::Extension(e) => {
1280-
let physical_inputs = self.create_initial_plan_multi(e.node.inputs(), session_state).await?;
1302+
let physical_inputs = self.create_initial_plan_multi(e.node.inputs(), session_state, ctx).await?;
12811303

12821304
let mut maybe_plan = None;
12831305
for planner in &self.extension_planners {
@@ -1313,13 +1335,19 @@ impl DefaultPhysicalPlanner {
13131335
Ok(plan)
13141336
}
13151337
}
1338+
// LogicalPlan::SubqueryAlias(SubqueryAlias())
13161339
LogicalPlan::RecursiveQuery(RecursiveQuery { name, static_term, recursive_term, is_distinct }) => {
1317-
let static_term = self.create_initial_plan(static_term, session_state).await?;
1318-
let recursive_term = self.create_initial_plan(recursive_term, session_state).await?;
1340+
let name = format!("{}-{}", name, new_recursive_cte_physical_plan_branch_number());
1341+
1342+
let ctx = Some(&name);
1343+
1344+
let static_term = self.create_initial_plan(static_term, session_state, ctx).await?;
1345+
let recursive_term = self.create_initial_plan(recursive_term, session_state, ctx).await?;
13191346

13201347
Ok(Arc::new(RecursiveQueryExec::new(name.clone(), static_term, recursive_term, *is_distinct)))
13211348
}
1322-
LogicalPlan::NamedRelation(NamedRelation {name, schema}) => {
1349+
LogicalPlan::NamedRelation(NamedRelation {schema, ..}) => {
1350+
let name = ctx.expect("NamedRelation must have a context that contains the recursive query's branch name");
13231351
// Named relations is how we represent access to any sort of dynamic data provider. They
13241352
// differ from tables in the sense that they can start existing dynamically during the
13251353
// execution of a query and then disappear before it even finishes.
@@ -1866,6 +1894,8 @@ impl DefaultPhysicalPlanner {
18661894
logical_plan: &LogicalPlan,
18671895
session_state: &SessionState,
18681896
) -> Result<Option<Arc<dyn ExecutionPlan>>> {
1897+
reset_recursive_cte_physical_plan_branch_number();
1898+
18691899
if let LogicalPlan::Explain(e) = logical_plan {
18701900
use PlanType::*;
18711901
let mut stringified_plans = vec![];
@@ -1881,7 +1911,7 @@ impl DefaultPhysicalPlanner {
18811911

18821912
if !config.logical_plan_only && e.logical_optimization_succeeded {
18831913
match self
1884-
.create_initial_plan(e.plan.as_ref(), session_state)
1914+
.create_initial_plan(e.plan.as_ref(), session_state, None)
18851915
.await
18861916
{
18871917
Ok(input) => {

datafusion/sql/src/query.rs

+10-8
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ use datafusion_common::{
2323
plan_err, sql_err, Constraints, DFSchema, DataFusionError, Result, ScalarValue,
2424
};
2525
use datafusion_expr::{
26-
CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan, LogicalPlanBuilder,
26+
logical_plan, CreateMemoryTable, DdlStatement, Distinct, Expr, LogicalPlan,
27+
LogicalPlanBuilder,
2728
};
2829
use sqlparser::ast::{
2930
Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, SetExpr, SetOperator,
@@ -133,10 +134,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
133134
static_metadata,
134135
)?;
135136

137+
let name = cte_name.clone();
138+
136139
// Step 2.2: Create a temporary relation logical plan that will be used
137140
// as the input to the recursive term
138141
let named_relation = LogicalPlanBuilder::named_relation(
139-
cte_name.as_str(),
142+
&name,
140143
Arc::new(named_relation_schema),
141144
)
142145
.build()?;
@@ -157,14 +160,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
157160

158161
// ---------- Step 4: Create the final plan ------------------
159162
// Step 4.1: Compile the final plan
160-
let final_plan = LogicalPlanBuilder::from(static_plan)
161-
.to_recursive_query(
162-
cte_name.clone(),
163-
recursive_plan,
164-
distinct,
165-
)?
163+
let logical_plan = LogicalPlanBuilder::from(static_plan)
164+
.to_recursive_query(name, recursive_plan, distinct)?
166165
.build()?;
167166

167+
let final_plan =
168+
self.apply_table_alias(logical_plan, cte.alias)?;
169+
168170
// Step 4.2: Remove the temporary relation from the planning context and replace it
169171
// with the final plan.
170172
planner_context.insert_cte(cte_name.clone(), final_plan);

0 commit comments

Comments
 (0)