19
19
20
20
use std:: collections:: HashMap ;
21
21
use std:: fmt:: Write ;
22
- use std:: sync:: Arc ;
22
+ use std:: sync:: atomic:: AtomicI32 ;
23
+ use std:: sync:: { Arc , OnceLock } ;
23
24
24
25
use crate :: datasource:: file_format:: arrow:: ArrowFormat ;
25
26
use crate :: datasource:: file_format:: avro:: AvroFormat ;
@@ -89,8 +90,8 @@ use datafusion_expr::expr::{
89
90
use datafusion_expr:: expr_rewriter:: unnormalize_cols;
90
91
use datafusion_expr:: logical_plan:: builder:: wrap_projection_for_join_if_necessary;
91
92
use datafusion_expr:: {
92
- DescribeTable , DmlStatement , ScalarFunctionDefinition , StringifiedPlan , WindowFrame ,
93
- WindowFrameBound , WriteOp , NamedRelation , RecursiveQuery ,
93
+ DescribeTable , DmlStatement , NamedRelation , RecursiveQuery , ScalarFunctionDefinition ,
94
+ StringifiedPlan , WindowFrame , WindowFrameBound , WriteOp ,
94
95
} ;
95
96
use datafusion_physical_expr:: expressions:: Literal ;
96
97
use datafusion_physical_plan:: placeholder_row:: PlaceholderRowExec ;
@@ -452,11 +453,13 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
452
453
logical_plan : & LogicalPlan ,
453
454
session_state : & SessionState ,
454
455
) -> Result < Arc < dyn ExecutionPlan > > {
456
+ reset_recursive_cte_physical_plan_branch_number ( ) ;
457
+
455
458
match self . handle_explain ( logical_plan, session_state) . await ? {
456
459
Some ( plan) => Ok ( plan) ,
457
460
None => {
458
461
let plan = self
459
- . create_initial_plan ( logical_plan, session_state)
462
+ . create_initial_plan ( logical_plan, session_state, None )
460
463
. await ?;
461
464
self . optimize_internal ( plan, session_state, |_, _| { } )
462
465
}
@@ -487,6 +490,23 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
487
490
}
488
491
}
489
492
493
+ // atomic global incrmenter
494
+
495
+ static RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH : OnceLock < AtomicI32 > = OnceLock :: new ( ) ;
496
+
497
+ fn new_recursive_cte_physical_plan_branch_number ( ) -> u32 {
498
+ let counter = RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
499
+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
500
+ . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
501
+ counter as u32
502
+ }
503
+
504
+ fn reset_recursive_cte_physical_plan_branch_number ( ) {
505
+ RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
506
+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
507
+ . store ( 0 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
508
+ }
509
+
490
510
impl DefaultPhysicalPlanner {
491
511
/// Create a physical planner that uses `extension_planners` to
492
512
/// plan user-defined logical nodes [`LogicalPlan::Extension`].
@@ -507,6 +527,7 @@ impl DefaultPhysicalPlanner {
507
527
& ' a self ,
508
528
logical_plans : impl IntoIterator < Item = & ' a LogicalPlan > + Send + ' a ,
509
529
session_state : & ' a SessionState ,
530
+ ctx : Option < & ' a String > ,
510
531
) -> BoxFuture < ' a , Result < Vec < Arc < dyn ExecutionPlan > > > > {
511
532
async move {
512
533
// First build futures with as little references as possible, then performing some stream magic.
@@ -519,7 +540,7 @@ impl DefaultPhysicalPlanner {
519
540
. into_iter ( )
520
541
. enumerate ( )
521
542
. map ( |( idx, lp) | async move {
522
- let plan = self . create_initial_plan ( lp, session_state) . await ?;
543
+ let plan = self . create_initial_plan ( lp, session_state, ctx ) . await ?;
523
544
Ok ( ( idx, plan) ) as Result < _ >
524
545
} )
525
546
. collect :: < Vec < _ > > ( ) ;
@@ -548,6 +569,7 @@ impl DefaultPhysicalPlanner {
548
569
& ' a self ,
549
570
logical_plan : & ' a LogicalPlan ,
550
571
session_state : & ' a SessionState ,
572
+ ctx : Option < & ' a String > ,
551
573
) -> BoxFuture < ' a , Result < Arc < dyn ExecutionPlan > > > {
552
574
async move {
553
575
let exec_plan: Result < Arc < dyn ExecutionPlan > > = match logical_plan {
@@ -572,7 +594,7 @@ impl DefaultPhysicalPlanner {
572
594
single_file_output,
573
595
copy_options,
574
596
} ) => {
575
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
597
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
576
598
let parsed_url = ListingTableUrl :: parse ( output_url) ?;
577
599
let object_store_url = parsed_url. object_store ( ) ;
578
600
@@ -620,7 +642,7 @@ impl DefaultPhysicalPlanner {
620
642
let name = table_name. table ( ) ;
621
643
let schema = session_state. schema_for_ref ( table_name) ?;
622
644
if let Some ( provider) = schema. table ( name) . await {
623
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
645
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
624
646
provider. insert_into ( session_state, input_exec, false ) . await
625
647
} else {
626
648
return exec_err ! (
@@ -637,7 +659,7 @@ impl DefaultPhysicalPlanner {
637
659
let name = table_name. table ( ) ;
638
660
let schema = session_state. schema_for_ref ( table_name) ?;
639
661
if let Some ( provider) = schema. table ( name) . await {
640
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
662
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
641
663
provider. insert_into ( session_state, input_exec, true ) . await
642
664
} else {
643
665
return exec_err ! (
@@ -678,7 +700,7 @@ impl DefaultPhysicalPlanner {
678
700
) ;
679
701
}
680
702
681
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
703
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
682
704
683
705
// at this moment we are guaranteed by the logical planner
684
706
// to have all the window_expr to have equal sort key
@@ -774,7 +796,7 @@ impl DefaultPhysicalPlanner {
774
796
..
775
797
} ) => {
776
798
// Initially need to perform the aggregate and then merge the partitions
777
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
799
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
778
800
let physical_input_schema = input_exec. schema ( ) ;
779
801
let logical_input_schema = input. as_ref ( ) . schema ( ) ;
780
802
@@ -848,7 +870,7 @@ impl DefaultPhysicalPlanner {
848
870
) ?) )
849
871
}
850
872
LogicalPlan :: Projection ( Projection { input, expr, .. } ) => {
851
- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
873
+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
852
874
let input_schema = input. as_ref ( ) . schema ( ) ;
853
875
854
876
let physical_exprs = expr
@@ -900,7 +922,7 @@ impl DefaultPhysicalPlanner {
900
922
) ?) )
901
923
}
902
924
LogicalPlan :: Filter ( filter) => {
903
- let physical_input = self . create_initial_plan ( & filter. input , session_state) . await ?;
925
+ let physical_input = self . create_initial_plan ( & filter. input , session_state, ctx ) . await ?;
904
926
let input_schema = physical_input. as_ref ( ) . schema ( ) ;
905
927
let input_dfschema = filter. input . schema ( ) ;
906
928
@@ -914,16 +936,16 @@ impl DefaultPhysicalPlanner {
914
936
let filter = FilterExec :: try_new ( runtime_expr, physical_input) ?;
915
937
Ok ( Arc :: new ( filter. with_default_selectivity ( selectivity) ?) )
916
938
}
917
- LogicalPlan :: Union ( Union { inputs, .. } ) => {
918
- let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state) . await ?;
939
+ LogicalPlan :: Union ( Union { inputs, schema } ) => {
940
+ let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state, ctx ) . await ?;
919
941
920
942
Ok ( Arc :: new ( UnionExec :: new ( physical_plans) ) )
921
943
}
922
944
LogicalPlan :: Repartition ( Repartition {
923
945
input,
924
946
partitioning_scheme,
925
947
} ) => {
926
- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
948
+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
927
949
let input_schema = physical_input. schema ( ) ;
928
950
let input_dfschema = input. as_ref ( ) . schema ( ) ;
929
951
let physical_partitioning = match partitioning_scheme {
@@ -954,7 +976,7 @@ impl DefaultPhysicalPlanner {
954
976
) ?) )
955
977
}
956
978
LogicalPlan :: Sort ( Sort { expr, input, fetch, .. } ) => {
957
- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
979
+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
958
980
let input_schema = physical_input. as_ref ( ) . schema ( ) ;
959
981
let input_dfschema = input. as_ref ( ) . schema ( ) ;
960
982
let sort_expr = expr
@@ -1045,12 +1067,12 @@ impl DefaultPhysicalPlanner {
1045
1067
} ;
1046
1068
1047
1069
return self
1048
- . create_initial_plan ( & join_plan, session_state)
1070
+ . create_initial_plan ( & join_plan, session_state, ctx )
1049
1071
. await ;
1050
1072
}
1051
1073
1052
1074
// All equi-join keys are columns now, create physical join plan
1053
- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1075
+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
1054
1076
let [ physical_left, physical_right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
1055
1077
let left_df_schema = left. schema ( ) ;
1056
1078
let right_df_schema = right. schema ( ) ;
@@ -1185,7 +1207,7 @@ impl DefaultPhysicalPlanner {
1185
1207
}
1186
1208
}
1187
1209
LogicalPlan :: CrossJoin ( CrossJoin { left, right, .. } ) => {
1188
- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1210
+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
1189
1211
let [ left, right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
1190
1212
Ok ( Arc :: new ( CrossJoinExec :: new ( left, right) ) )
1191
1213
}
@@ -1203,10 +1225,10 @@ impl DefaultPhysicalPlanner {
1203
1225
SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ,
1204
1226
) ) ) ,
1205
1227
LogicalPlan :: SubqueryAlias ( SubqueryAlias { input, .. } ) => {
1206
- self . create_initial_plan ( input, session_state) . await
1228
+ self . create_initial_plan ( input, session_state, ctx ) . await
1207
1229
}
1208
1230
LogicalPlan :: Limit ( Limit { input, skip, fetch, .. } ) => {
1209
- let input = self . create_initial_plan ( input, session_state) . await ?;
1231
+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
1210
1232
1211
1233
// GlobalLimitExec requires a single partition for input
1212
1234
let input = if input. output_partitioning ( ) . partition_count ( ) == 1 {
@@ -1224,7 +1246,7 @@ impl DefaultPhysicalPlanner {
1224
1246
Ok ( Arc :: new ( GlobalLimitExec :: new ( input, * skip, * fetch) ) )
1225
1247
}
1226
1248
LogicalPlan :: Unnest ( Unnest { input, column, schema, options } ) => {
1227
- let input = self . create_initial_plan ( input, session_state) . await ?;
1249
+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
1228
1250
let column_exec = schema. index_of_column ( column)
1229
1251
. map ( |idx| Column :: new ( & column. name , idx) ) ?;
1230
1252
let schema = SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ;
@@ -1277,7 +1299,7 @@ impl DefaultPhysicalPlanner {
1277
1299
"Unsupported logical plan: Analyze must be root of the plan"
1278
1300
) ,
1279
1301
LogicalPlan :: Extension ( e) => {
1280
- let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state) . await ?;
1302
+ let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state, ctx ) . await ?;
1281
1303
1282
1304
let mut maybe_plan = None ;
1283
1305
for planner in & self . extension_planners {
@@ -1313,13 +1335,19 @@ impl DefaultPhysicalPlanner {
1313
1335
Ok ( plan)
1314
1336
}
1315
1337
}
1338
+ // LogicalPlan::SubqueryAlias(SubqueryAlias())
1316
1339
LogicalPlan :: RecursiveQuery ( RecursiveQuery { name, static_term, recursive_term, is_distinct } ) => {
1317
- let static_term = self . create_initial_plan ( static_term, session_state) . await ?;
1318
- let recursive_term = self . create_initial_plan ( recursive_term, session_state) . await ?;
1340
+ let name = format ! ( "{}-{}" , name, new_recursive_cte_physical_plan_branch_number( ) ) ;
1341
+
1342
+ let ctx = Some ( & name) ;
1343
+
1344
+ let static_term = self . create_initial_plan ( static_term, session_state, ctx) . await ?;
1345
+ let recursive_term = self . create_initial_plan ( recursive_term, session_state, ctx) . await ?;
1319
1346
1320
1347
Ok ( Arc :: new ( RecursiveQueryExec :: new ( name. clone ( ) , static_term, recursive_term, * is_distinct) ) )
1321
1348
}
1322
- LogicalPlan :: NamedRelation ( NamedRelation { name, schema} ) => {
1349
+ LogicalPlan :: NamedRelation ( NamedRelation { schema, ..} ) => {
1350
+ let name = ctx. expect ( "NamedRelation must have a context that contains the recursive query's branch name" ) ;
1323
1351
// Named relations is how we represent access to any sort of dynamic data provider. They
1324
1352
// differ from tables in the sense that they can start existing dynamically during the
1325
1353
// execution of a query and then disappear before it even finishes.
@@ -1866,6 +1894,8 @@ impl DefaultPhysicalPlanner {
1866
1894
logical_plan : & LogicalPlan ,
1867
1895
session_state : & SessionState ,
1868
1896
) -> Result < Option < Arc < dyn ExecutionPlan > > > {
1897
+ reset_recursive_cte_physical_plan_branch_number ( ) ;
1898
+
1869
1899
if let LogicalPlan :: Explain ( e) = logical_plan {
1870
1900
use PlanType :: * ;
1871
1901
let mut stringified_plans = vec ! [ ] ;
@@ -1881,7 +1911,7 @@ impl DefaultPhysicalPlanner {
1881
1911
1882
1912
if !config. logical_plan_only && e. logical_optimization_succeeded {
1883
1913
match self
1884
- . create_initial_plan ( e. plan . as_ref ( ) , session_state)
1914
+ . create_initial_plan ( e. plan . as_ref ( ) , session_state, None )
1885
1915
. await
1886
1916
{
1887
1917
Ok ( input) => {
0 commit comments