@@ -82,23 +82,36 @@ def expand_payload_lifecycle_stages(
8282
8383 reader = _last_manifest_reader (stages [: materialize_idx + 1 ])
8484 payload_specs = _payload_binding_specs (payload_cfg , stages = stages , consumers = consumers , reader = reader )
85+ _configure_planned_source_segment_inputs (reader , payload_cfg , payload_specs , config )
8586 _validate_payload_consumers (consumers , payload_specs )
87+ _validate_single_segment_planner_owner (
88+ reader ,
89+ consumers ,
90+ config = config ,
91+ )
8692
87- materializers = [_build_payload_materializer (reader , spec , payload_cfg , config , run_id = run_id ) for spec in payload_specs ]
93+ materializers = [
94+ _build_payload_materializer (reader , spec , payload_cfg , config , run_id = run_id )
95+ for spec in payload_specs
96+ ]
8897 primary_spec = payload_specs [0 ]
8998 release = payload_release_stage_cls (
9099 name = str (payload_cfg .get ("release_stage_name" , "payload_release" )),
91100 payload_ref_key = primary_spec .ref_key ,
92101 waveform_key = primary_spec .waveform_key ,
93102 )
94103
104+ assembler = _post_release_payload_lifecycle_stage (config , reader , consumers , primary_spec , run_id = run_id )
105+
95106 expanded : list [ProcessingStage ] = []
96107 for idx , stage in enumerate (stages ):
97108 expanded .append (stage )
98109 if idx == materialize_idx :
99110 expanded .extend (materializers )
100111 if idx == release_idx :
101112 expanded .append (release )
113+ if assembler is not None :
114+ expanded .append (assembler )
102115 logger .info ("Expanded logical graph into payload lifecycle execution graph: {}" , " -> " .join (stage .name for stage in expanded ))
103116 return expanded
104117
@@ -249,6 +262,108 @@ def _stage_payload_bindings(stage: ProcessingStage) -> list[dict[str, str]]:
249262 return []
250263
251264
265+ def _configure_planned_source_segment_inputs (
266+ reader : ProcessingStage | None ,
267+ payload_cfg : dict [str , Any ],
268+ payload_specs : list [PayloadBindingSpec ],
269+ config : Any ,
270+ ) -> None :
271+ if reader is None or not bool (getattr (reader , "enable_global_bucketing" , False )):
272+ return
273+ scheduler_cfg = _config_section (config , "global_audio_scheduler" )
274+ configured = scheduler_cfg .get ("segment_input_keys" , payload_cfg .get ("segment_input_keys" ))
275+ segment_input_keys : list [str ] = []
276+ if configured is not None :
277+ segment_input_keys .extend (_normalise_string_list (configured , key = "global_audio_scheduler.segment_input_keys" ))
278+ segment_input_keys .extend (spec .source_key for spec in payload_specs )
279+ setattr (reader , "segment_input_keys" , _dedupe_strings (segment_input_keys ))
280+ setattr (reader , "run_id" , _pipeline_run_id (config ))
281+ if "parent_store_actor_name_prefix" in scheduler_cfg :
282+ setattr (reader , "parent_store_actor_name_prefix" , str (scheduler_cfg ["parent_store_actor_name_prefix" ]))
283+
284+
285+ def _validate_single_segment_planner_owner (
286+ reader : ProcessingStage | None ,
287+ consumers : list [ProcessingStage ],
288+ * ,
289+ config : Any ,
290+ ) -> None :
291+ if reader is None or not bool (getattr (reader , "enable_global_bucketing" , False )):
292+ return
293+ owner_stage = _single_selector (getattr (reader , "owner_stage" , None ), key = "global_audio_scheduler.owner_stage" )
294+ matching_consumers = [stage for stage in consumers if owner_stage in _stage_match_idents (stage )]
295+ if not matching_consumers :
296+ available = sorted ({ident for stage in consumers for ident in _stage_match_idents (stage )})
297+ msg = (
298+ "global_audio_scheduler.owner_stage must select exactly one stage listed in "
299+ "payload_lifecycle.consumers. Global bucketing has a single planning owner; "
300+ f"{ owner_stage !r} was not found in payload consumers { available } ."
301+ )
302+ raise ValueError (msg )
303+ if len (matching_consumers ) > 1 :
304+ names = [stage .name for stage in matching_consumers ]
305+ msg = f"global_audio_scheduler.owner_stage must select exactly one payload consumer; matched { names } "
306+ raise ValueError (msg )
307+ _validate_planner_owner_has_largest_model_window (reader = reader , owner = matching_consumers [0 ], consumers = consumers )
308+ setattr (reader , "owner_stage" , owner_stage )
309+
310+
311+ def _validate_planner_owner_has_largest_model_window (
312+ * ,
313+ reader : ProcessingStage ,
314+ owner : ProcessingStage ,
315+ consumers : list [ProcessingStage ],
316+ ) -> None :
317+ owner_max_s = _required_positive_seconds (owner , "max_inference_duration_s" )
318+ consumer_max_s = [(stage .name , _required_positive_seconds (stage , "max_inference_duration_s" )) for stage in consumers ]
319+ larger_consumers = [(name , max_s ) for name , max_s in consumer_max_s if max_s > owner_max_s ]
320+ if larger_consumers :
321+ details = ", " .join (f"{ name } ={ value :g} s" for name , value in larger_consumers )
322+ msg = (
323+ "global_audio_scheduler.owner_stage must select the payload consumer with the largest "
324+ "max_inference_duration_s because the source planner emits one segment plan. "
325+ f"Selected owner { owner .name !r} has max_inference_duration_s={ owner_max_s :g} s, "
326+ f"but larger consumer(s) exist: { details } ."
327+ )
328+ raise ValueError (msg )
329+
330+ reader_max_s = _required_positive_seconds (reader , "max_inference_duration_s" )
331+ if abs (reader_max_s - owner_max_s ) > 1e-6 :
332+ msg = (
333+ "ManifestReader(enable_global_bucketing=True).max_inference_duration_s must match the "
334+ "selected owner stage's max_inference_duration_s. "
335+ f"Reader has { reader_max_s :g} s, owner { owner .name !r} has { owner_max_s :g} s."
336+ )
337+ raise ValueError (msg )
338+
339+
340+ def _required_positive_seconds (stage : ProcessingStage , attr : str ) -> float :
341+ value = getattr (stage , attr , None )
342+ if value is None :
343+ msg = f"Global bucketing requires stage { stage .name !r} to define positive { attr } "
344+ raise ValueError (msg )
345+ return _positive_seconds (value , label = f"{ stage .name } .{ attr } " )
346+
347+
348+ def _optional_positive_seconds (stage : ProcessingStage , attr : str ) -> float | None :
349+ value = getattr (stage , attr , None )
350+ if value is None :
351+ return None
352+ return _positive_seconds (value , label = f"{ stage .name } .{ attr } " )
353+
354+
355+ def _positive_seconds (value : Any , * , label : str ) -> float :
356+ try :
357+ seconds = float (value )
358+ except (TypeError , ValueError ) as exc :
359+ msg = f"{ label } must be a positive number of seconds, got { value !r} "
360+ raise TypeError (msg ) from exc
361+ if seconds <= 0 :
362+ msg = f"{ label } must be > 0 seconds, got { seconds :g} "
363+ raise ValueError (msg )
364+ return seconds
365+
366+
252367def _build_payload_materializer (
253368 reader : ProcessingStage | None ,
254369 spec : PayloadBindingSpec ,
@@ -274,6 +389,33 @@ def _build_payload_materializer(
274389 )
275390
276391
392+ def _post_release_payload_lifecycle_stage (
393+ config : Any ,
394+ reader : ProcessingStage | None ,
395+ consumers : list [ProcessingStage ],
396+ primary_spec : PayloadBindingSpec ,
397+ * ,
398+ run_id : str ,
399+ ) -> ProcessingStage | None :
400+ if reader is None or not bool (getattr (reader , "enable_global_bucketing" , False )):
401+ return None
402+ builder = getattr (reader , "build_payload_lifecycle_post_release_stage" , None )
403+ if not callable (builder ):
404+ msg = (
405+ "Global bucketing is enabled, but the source/reader stage does not provide "
406+ "build_payload_lifecycle_post_release_stage(). The central payload lifecycle "
407+ "planner only owns generic insertion order; modality-specific assembly must be "
408+ f"provided by the planner stage, got { type (reader ).__name__ } ."
409+ )
410+ raise ValueError (msg )
411+ return builder (
412+ pipeline_config = config ,
413+ consumers = consumers ,
414+ primary_payload_spec = primary_spec ,
415+ run_id = run_id ,
416+ )
417+
418+
277419def _pipeline_run_id (config : Any ) -> str :
278420 value = _config_get (config , "_curator_pipeline_run_id" )
279421 text = str (value or "" ).strip ()
@@ -352,6 +494,20 @@ def _normalise_string_list(value: Any, *, key: str) -> list[str]:
352494 return result
353495
354496
497+ def _dedupe_strings (values : list [str ]) -> list [str ]:
498+ result : list [str ] = []
499+ seen : set [str ] = set ()
500+ for value in values :
501+ text = str (value ).strip ()
502+ if text and text not in seen :
503+ seen .add (text )
504+ result .append (text )
505+ if not result :
506+ msg = "At least one non-empty string is required"
507+ raise ValueError (msg )
508+ return result
509+
510+
355511def _single_selector (value : Any , * , key : str ) -> str :
356512 values = _normalise_string_list (value , key = key )
357513 if len (values ) != 1 :
0 commit comments