19
19
20
20
use std:: collections:: HashMap ;
21
21
use std:: fmt:: Write ;
22
- use std:: sync:: Arc ;
22
+ use std:: sync:: atomic:: AtomicI32 ;
23
+ use std:: sync:: { Arc , OnceLock } ;
23
24
24
25
use crate :: datasource:: file_format:: arrow:: ArrowFormat ;
25
26
use crate :: datasource:: file_format:: avro:: AvroFormat ;
@@ -89,8 +90,8 @@ use datafusion_expr::expr::{
89
90
use datafusion_expr:: expr_rewriter:: unnormalize_cols;
90
91
use datafusion_expr:: logical_plan:: builder:: wrap_projection_for_join_if_necessary;
91
92
use datafusion_expr:: {
92
- DescribeTable , DmlStatement , ScalarFunctionDefinition , StringifiedPlan , WindowFrame ,
93
- WindowFrameBound , WriteOp , NamedRelation , RecursiveQuery ,
93
+ DescribeTable , DmlStatement , NamedRelation , RecursiveQuery , ScalarFunctionDefinition ,
94
+ StringifiedPlan , WindowFrame , WindowFrameBound , WriteOp ,
94
95
} ;
95
96
use datafusion_physical_expr:: expressions:: Literal ;
96
97
use datafusion_physical_plan:: placeholder_row:: PlaceholderRowExec ;
@@ -452,11 +453,13 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
452
453
logical_plan : & LogicalPlan ,
453
454
session_state : & SessionState ,
454
455
) -> Result < Arc < dyn ExecutionPlan > > {
456
+ reset_recursive_cte_physical_plan_branch_number ( ) ;
457
+
455
458
match self . handle_explain ( logical_plan, session_state) . await ? {
456
459
Some ( plan) => Ok ( plan) ,
457
460
None => {
458
461
let plan = self
459
- . create_initial_plan ( logical_plan, session_state)
462
+ . create_initial_plan ( logical_plan, session_state, None )
460
463
. await ?;
461
464
self . optimize_internal ( plan, session_state, |_, _| { } )
462
465
}
@@ -487,6 +490,21 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
487
490
}
488
491
}
489
492
493
+ static RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH : OnceLock < AtomicI32 > = OnceLock :: new ( ) ;
494
+
495
+ fn new_recursive_cte_physical_plan_branch_number ( ) -> u32 {
496
+ let counter = RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
497
+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
498
+ . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
499
+ counter as u32
500
+ }
501
+
502
+ fn reset_recursive_cte_physical_plan_branch_number ( ) {
503
+ RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
504
+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
505
+ . store ( 0 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
506
+ }
507
+
490
508
impl DefaultPhysicalPlanner {
491
509
/// Create a physical planner that uses `extension_planners` to
492
510
/// plan user-defined logical nodes [`LogicalPlan::Extension`].
@@ -507,6 +525,7 @@ impl DefaultPhysicalPlanner {
507
525
& ' a self ,
508
526
logical_plans : impl IntoIterator < Item = & ' a LogicalPlan > + Send + ' a ,
509
527
session_state : & ' a SessionState ,
528
+ ctx : Option < & ' a String > ,
510
529
) -> BoxFuture < ' a , Result < Vec < Arc < dyn ExecutionPlan > > > > {
511
530
async move {
512
531
// First build futures with as little references as possible, then performing some stream magic.
@@ -519,7 +538,7 @@ impl DefaultPhysicalPlanner {
519
538
. into_iter ( )
520
539
. enumerate ( )
521
540
. map ( |( idx, lp) | async move {
522
- let plan = self . create_initial_plan ( lp, session_state) . await ?;
541
+ let plan = self . create_initial_plan ( lp, session_state, ctx ) . await ?;
523
542
Ok ( ( idx, plan) ) as Result < _ >
524
543
} )
525
544
. collect :: < Vec < _ > > ( ) ;
@@ -548,6 +567,7 @@ impl DefaultPhysicalPlanner {
548
567
& ' a self ,
549
568
logical_plan : & ' a LogicalPlan ,
550
569
session_state : & ' a SessionState ,
570
+ ctx : Option < & ' a String > ,
551
571
) -> BoxFuture < ' a , Result < Arc < dyn ExecutionPlan > > > {
552
572
async move {
553
573
let exec_plan: Result < Arc < dyn ExecutionPlan > > = match logical_plan {
@@ -572,7 +592,7 @@ impl DefaultPhysicalPlanner {
572
592
single_file_output,
573
593
copy_options,
574
594
} ) => {
575
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
595
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
576
596
let parsed_url = ListingTableUrl :: parse ( output_url) ?;
577
597
let object_store_url = parsed_url. object_store ( ) ;
578
598
@@ -620,7 +640,7 @@ impl DefaultPhysicalPlanner {
620
640
let name = table_name. table ( ) ;
621
641
let schema = session_state. schema_for_ref ( table_name) ?;
622
642
if let Some ( provider) = schema. table ( name) . await {
623
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
643
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
624
644
provider. insert_into ( session_state, input_exec, false ) . await
625
645
} else {
626
646
return exec_err ! (
@@ -637,7 +657,7 @@ impl DefaultPhysicalPlanner {
637
657
let name = table_name. table ( ) ;
638
658
let schema = session_state. schema_for_ref ( table_name) ?;
639
659
if let Some ( provider) = schema. table ( name) . await {
640
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
660
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
641
661
provider. insert_into ( session_state, input_exec, true ) . await
642
662
} else {
643
663
return exec_err ! (
@@ -678,7 +698,7 @@ impl DefaultPhysicalPlanner {
678
698
) ;
679
699
}
680
700
681
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
701
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
682
702
683
703
// at this moment we are guaranteed by the logical planner
684
704
// to have all the window_expr to have equal sort key
@@ -774,7 +794,7 @@ impl DefaultPhysicalPlanner {
774
794
..
775
795
} ) => {
776
796
// Initially need to perform the aggregate and then merge the partitions
777
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
797
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
778
798
let physical_input_schema = input_exec. schema ( ) ;
779
799
let logical_input_schema = input. as_ref ( ) . schema ( ) ;
780
800
@@ -848,7 +868,7 @@ impl DefaultPhysicalPlanner {
848
868
) ?) )
849
869
}
850
870
LogicalPlan :: Projection ( Projection { input, expr, .. } ) => {
851
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
871
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
852
872
let input_schema = input. as_ref ( ) . schema ( ) ;
853
873
854
874
let physical_exprs = expr
@@ -900,7 +920,7 @@ impl DefaultPhysicalPlanner {
900
920
) ?) )
901
921
}
902
922
LogicalPlan :: Filter ( filter) => {
903
- let physical_input = self . create_initial_plan ( & filter. input , session_state) . await ?;
923
+ let physical_input = self . create_initial_plan ( & filter. input , session_state, ctx ) . await ?;
904
924
let input_schema = physical_input. as_ref ( ) . schema ( ) ;
905
925
let input_dfschema = filter. input . schema ( ) ;
906
926
@@ -914,16 +934,16 @@ impl DefaultPhysicalPlanner {
914
934
let filter = FilterExec :: try_new ( runtime_expr, physical_input) ?;
915
935
Ok ( Arc :: new ( filter. with_default_selectivity ( selectivity) ?) )
916
936
}
917
- LogicalPlan :: Union ( Union { inputs, .. } ) => {
918
- let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state) . await ?;
937
+ LogicalPlan :: Union ( Union { inputs, schema : _ } ) => {
938
+ let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state, ctx ) . await ?;
919
939
920
940
Ok ( Arc :: new ( UnionExec :: new ( physical_plans) ) )
921
941
}
922
942
LogicalPlan :: Repartition ( Repartition {
923
943
input,
924
944
partitioning_scheme,
925
945
} ) => {
926
- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
946
+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
927
947
let input_schema = physical_input. schema ( ) ;
928
948
let input_dfschema = input. as_ref ( ) . schema ( ) ;
929
949
let physical_partitioning = match partitioning_scheme {
@@ -954,7 +974,7 @@ impl DefaultPhysicalPlanner {
954
974
) ?) )
955
975
}
956
976
LogicalPlan :: Sort ( Sort { expr, input, fetch, .. } ) => {
957
- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
977
+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
958
978
let input_schema = physical_input. as_ref ( ) . schema ( ) ;
959
979
let input_dfschema = input. as_ref ( ) . schema ( ) ;
960
980
let sort_expr = expr
@@ -1045,12 +1065,12 @@ impl DefaultPhysicalPlanner {
1045
1065
} ;
1046
1066
1047
1067
return self
1048
- . create_initial_plan ( & join_plan, session_state)
1068
+ . create_initial_plan ( & join_plan, session_state, ctx )
1049
1069
. await ;
1050
1070
}
1051
1071
1052
1072
// All equi-join keys are columns now, create physical join plan
1053
- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1073
+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
1054
1074
let [ physical_left, physical_right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
1055
1075
let left_df_schema = left. schema ( ) ;
1056
1076
let right_df_schema = right. schema ( ) ;
@@ -1185,7 +1205,7 @@ impl DefaultPhysicalPlanner {
1185
1205
}
1186
1206
}
1187
1207
LogicalPlan :: CrossJoin ( CrossJoin { left, right, .. } ) => {
1188
- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1208
+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
1189
1209
let [ left, right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
1190
1210
Ok ( Arc :: new ( CrossJoinExec :: new ( left, right) ) )
1191
1211
}
@@ -1203,10 +1223,10 @@ impl DefaultPhysicalPlanner {
1203
1223
SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ,
1204
1224
) ) ) ,
1205
1225
LogicalPlan :: SubqueryAlias ( SubqueryAlias { input, .. } ) => {
1206
- self . create_initial_plan ( input, session_state) . await
1226
+ self . create_initial_plan ( input, session_state, ctx ) . await
1207
1227
}
1208
1228
LogicalPlan :: Limit ( Limit { input, skip, fetch, .. } ) => {
1209
- let input = self . create_initial_plan ( input, session_state) . await ?;
1229
+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
1210
1230
1211
1231
// GlobalLimitExec requires a single partition for input
1212
1232
let input = if input. output_partitioning ( ) . partition_count ( ) == 1 {
@@ -1224,7 +1244,7 @@ impl DefaultPhysicalPlanner {
1224
1244
Ok ( Arc :: new ( GlobalLimitExec :: new ( input, * skip, * fetch) ) )
1225
1245
}
1226
1246
LogicalPlan :: Unnest ( Unnest { input, column, schema, options } ) => {
1227
- let input = self . create_initial_plan ( input, session_state) . await ?;
1247
+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
1228
1248
let column_exec = schema. index_of_column ( column)
1229
1249
. map ( |idx| Column :: new ( & column. name , idx) ) ?;
1230
1250
let schema = SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ;
@@ -1277,7 +1297,7 @@ impl DefaultPhysicalPlanner {
1277
1297
"Unsupported logical plan: Analyze must be root of the plan"
1278
1298
) ,
1279
1299
LogicalPlan :: Extension ( e) => {
1280
- let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state) . await ?;
1300
+ let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state, ctx ) . await ?;
1281
1301
1282
1302
let mut maybe_plan = None ;
1283
1303
for planner in & self . extension_planners {
@@ -1314,12 +1334,17 @@ impl DefaultPhysicalPlanner {
1314
1334
}
1315
1335
}
1316
1336
LogicalPlan :: RecursiveQuery ( RecursiveQuery { name, static_term, recursive_term, is_distinct } ) => {
1317
- let static_term = self . create_initial_plan ( static_term, session_state) . await ?;
1318
- let recursive_term = self . create_initial_plan ( recursive_term, session_state) . await ?;
1337
+ let name = format ! ( "{}-{}" , name, new_recursive_cte_physical_plan_branch_number( ) ) ;
1338
+
1339
+ let ctx = Some ( & name) ;
1340
+
1341
+ let static_term = self . create_initial_plan ( static_term, session_state, ctx) . await ?;
1342
+ let recursive_term = self . create_initial_plan ( recursive_term, session_state, ctx) . await ?;
1319
1343
1320
1344
Ok ( Arc :: new ( RecursiveQueryExec :: new ( name. clone ( ) , static_term, recursive_term, * is_distinct) ) )
1321
1345
}
1322
- LogicalPlan :: NamedRelation ( NamedRelation { name, schema} ) => {
1346
+ LogicalPlan :: NamedRelation ( NamedRelation { schema, ..} ) => {
1347
+ let name = ctx. expect ( "NamedRelation must have a context that contains the recursive query's branch name" ) ;
1323
1348
// Named relations is how we represent access to any sort of dynamic data provider. They
1324
1349
// differ from tables in the sense that they can start existing dynamically during the
1325
1350
// execution of a query and then disappear before it even finishes.
@@ -1866,6 +1891,8 @@ impl DefaultPhysicalPlanner {
1866
1891
logical_plan : & LogicalPlan ,
1867
1892
session_state : & SessionState ,
1868
1893
) -> Result < Option < Arc < dyn ExecutionPlan > > > {
1894
+ reset_recursive_cte_physical_plan_branch_number ( ) ;
1895
+
1869
1896
if let LogicalPlan :: Explain ( e) = logical_plan {
1870
1897
use PlanType :: * ;
1871
1898
let mut stringified_plans = vec ! [ ] ;
@@ -1881,7 +1908,7 @@ impl DefaultPhysicalPlanner {
1881
1908
1882
1909
if !config. logical_plan_only && e. logical_optimization_succeeded {
1883
1910
match self
1884
- . create_initial_plan ( e. plan . as_ref ( ) , session_state)
1911
+ . create_initial_plan ( e. plan . as_ref ( ) , session_state, None )
1885
1912
. await
1886
1913
{
1887
1914
Ok ( input) => {
0 commit comments