upd

mickqian · mickqian · commit 06c4c296e6c8 · 2025-12-05T11:15:14.000+08:00
diff --git a/python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising.py b/python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising.py
@@ -184,6 +184,30 @@ def parallelism_type(self) -> StageParallelismType:
         # return StageParallelismType.CFG_PARALLEL if get_global_server_args().enable_cfg_parallel else StageParallelismType.REPLICATED
         return StageParallelismType.REPLICATED
 
+    def _handle_boundary_ratio(
+        self,
+        server_args,
+        batch,
+    ):
+        """
+        (Wan2.2) Calculate timestep to switch from high noise expert to low noise expert
+        """
+        boundary_ratio = server_args.pipeline_config.dit_config.boundary_ratio
+        if batch.boundary_ratio is not None:
+            logger.info(
+                "Overriding boundary ratio from %s to %s",
+                boundary_ratio,
+                batch.boundary_ratio,
+            )
+            boundary_ratio = batch.boundary_ratio
+
+        if boundary_ratio is not None:
+            boundary_timestep = boundary_ratio * self.scheduler.num_train_timesteps
+        else:
+            boundary_timestep = None
+
+        return boundary_timestep
+
     def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs):
         """
         Prepare all necessary invariant variables for the denoising loop.
@@ -250,19 +274,7 @@ def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs):
             # Removed Tensor truthiness assert to avoid GPU sync
 
         # (Wan2.2) Calculate timestep to switch from high noise expert to low noise expert
-        boundary_ratio = server_args.pipeline_config.dit_config.boundary_ratio
-        if batch.boundary_ratio is not None:
-            logger.info(
-                "Overriding boundary ratio from %s to %s",
-                boundary_ratio,
-                batch.boundary_ratio,
-            )
-            boundary_ratio = batch.boundary_ratio
-
-        if boundary_ratio is not None:
-            boundary_timestep = boundary_ratio * self.scheduler.num_train_timesteps
-        else:
-            boundary_timestep = None
+        boundary_timestep = self._handle_boundary_ratio(server_args, batch)
 
         # TI2V specific preparations - BEFORE SP sharding
         z, z_sp, reserved_frames_masks, reserved_frames_mask_sp, seq_len = (
@@ -363,143 +375,39 @@ def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs):
                     # Should not happen for TI2V
                     z_sp = z
 
-            # Shard reserved_frames_mask along time dimension to match sharded latents
-            # reserved_frames_mask is a list from masks_like, extract reserved_frames_mask[0] first
-            # reserved_frames_mask[0] shape: [C, T, H, W]
-            # All ranks need their portion of reserved_frames_mask for timestep calculation
-            if reserved_frames_masks is not None:
-                reserved_frames_mask = reserved_frames_masks[
-                    0
-                ]  # Extract tensor from list
-                time_dim = reserved_frames_mask.shape[1]  # [C, T, H, W]
-                if time_dim > 0 and time_dim % sp_world_size == 0:
-                    reserved_frames_mask_sp_tensor = rearrange(
-                        reserved_frames_mask,
-                        "c (n t) h w -> c n t h w",
-                        n=sp_world_size,
-                    ).contiguous()
-                    reserved_frames_mask_sp_tensor = reserved_frames_mask_sp_tensor[
-                        :, rank_in_sp_group, :, :, :
-                    ]
-                    reserved_frames_mask_sp = (
-                        reserved_frames_mask_sp_tensor  # Store as tensor, not list
-                    )
+                # Shard reserved_frames_mask along time dimension to match sharded latents
+                # reserved_frames_mask is a list from masks_like, extract reserved_frames_mask[0] first
+                # reserved_frames_mask[0] shape: [C, T, H, W]
+                # All ranks need their portion of reserved_frames_mask for timestep calculation
+                if reserved_frames_masks is not None:
+                    reserved_frames_mask = reserved_frames_masks[
+                        0
+                    ]  # Extract tensor from list
+                    time_dim = reserved_frames_mask.shape[1]  # [C, T, H, W]
+                    if time_dim > 0 and time_dim % sp_world_size == 0:
+                        reserved_frames_mask_sp_tensor = rearrange(
+                            reserved_frames_mask,
+                            "c (n t) h w -> c n t h w",
+                            n=sp_world_size,
+                        ).contiguous()
+                        reserved_frames_mask_sp_tensor = reserved_frames_mask_sp_tensor[
+                            :, rank_in_sp_group, :, :, :
+                        ]
+                        reserved_frames_mask_sp = (
+                            reserved_frames_mask_sp_tensor  # Store as tensor, not list
+                        )
+                    else:
+                        reserved_frames_mask_sp = reserved_frames_mask
                 else:
-                    reserved_frames_mask_sp = reserved_frames_mask
+                    reserved_frames_mask_sp = None
             else:
-                reserved_frames_mask_sp = None
-        else:
-            # SP not enabled or latents not sharded
-            z_sp = z
-            reserved_frames_mask_sp = (
-                reserved_frames_masks[0] if reserved_frames_masks is not None else None
-            )  # Extract tensor
-
-        return reserved_frames_mask_sp, z_sp
-
-    def _handle_boundary_ratio(
-        self,
-        server_args,
-        batch,
-    ):
-        """
-        (Wan2.2) Calculate timestep to switch from high noise expert to low noise expert
-        """
-        boundary_ratio = server_args.pipeline_config.dit_config.boundary_ratio
-        if batch.boundary_ratio is not None:
-            logger.info(
-                "Overriding boundary ratio from %s to %s",
-                boundary_ratio,
-                batch.boundary_ratio,
-            )
-            boundary_ratio = batch.boundary_ratio
-
-        if boundary_ratio is not None:
-            boundary_timestep = boundary_ratio * self.scheduler.num_train_timesteps
-        else:
-            boundary_timestep = None
-
-        return boundary_timestep
-
-    def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs):
-        """
-        Prepare all necessary invariant variables for the denoising loop.
-
-        Args:
-            batch: The current batch information.
-            server_args: The inference arguments.
-
-        Returns:
-            A dictionary containing all the prepared variables for the denoising loop.
-        """
-        pipeline = self.pipeline() if self.pipeline else None
-        if not server_args.model_loaded["transformer"]:
-            loader = TransformerLoader()
-            self.transformer = loader.load(
-                server_args.model_paths["transformer"], server_args
-            )
-            if self.server_args.enable_torch_compile:
-                self.transformer = torch.compile(
-                    self.transformer, mode="max-autotune", fullgraph=True
-                )
-            if pipeline:
-                pipeline.add_module("transformer", self.transformer)
-            server_args.model_loaded["transformer"] = True
-
-        # Prepare extra step kwargs for scheduler
-        extra_step_kwargs = self.prepare_extra_func_kwargs(
-            self.scheduler.step,
-            {"generator": batch.generator, "eta": batch.eta},
-        )
-
-        # Setup precision and autocast settings
-        target_dtype = torch.bfloat16
-        autocast_enabled = (
-            target_dtype != torch.float32
-        ) and not server_args.disable_autocast
-
-        # Get timesteps and calculate warmup steps
-        timesteps = batch.timesteps
-        if timesteps is None:
-            raise ValueError("Timesteps must be provided")
-        num_inference_steps = batch.num_inference_steps
-        num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
-
-        # Prepare image latents and embeddings for I2V generation
-        image_embeds = batch.image_embeds
-        if len(image_embeds) > 0:
-            image_embeds = [
-                image_embed.to(target_dtype) for image_embed in image_embeds
-            ]
-
-        # Prepare STA parameters
-        if st_attn_available and self.attn_backend == SlidingTileAttentionBackend:
-            self.prepare_sta_param(batch, server_args)
-
-        # Get latents and embeddings
-        latents = batch.latents
-        prompt_embeds = batch.prompt_embeds
-        # Removed Tensor truthiness assert to avoid GPU sync
-        neg_prompt_embeds = None
-        if batch.do_classifier_free_guidance:
-            neg_prompt_embeds = batch.negative_prompt_embeds
-            assert neg_prompt_embeds is not None
-            # Removed Tensor truthiness assert to avoid GPU sync
-
-        boundary_timestep = self._handle_boundary_ratio(server_args, batch)
-
-        # specifically for Wan2_2_TI2V_5B_Config, not applicable for FastWan2_2_TI2V_5B_Config
-        should_preprocess_for_wan_ti2v = (
-            server_args.pipeline_config.task_type == ModelTaskType.TI2V
-            and batch.condition_image is not None
-            and type(server_args.pipeline_config) is Wan2_2_TI2V_5B_Config
-        )
-
-        # TI2V specific preparations - before SP sharding
-        if should_preprocess_for_wan_ti2v:
-            seq_len, z, reserved_frames_masks = self._preprocess_latents_for_ti2v(
-                latents, target_dtype, batch, server_args
-            )
+                # SP not enabled or latents not sharded
+                z_sp = z
+                reserved_frames_mask_sp = (
+                    reserved_frames_masks[0]
+                    if reserved_frames_masks is not None
+                    else None
+                )  # Extract tensor
         else:
             # TI2V not enabled or SP not enabled
             z_sp = z
diff --git a/python/sglang/multimodal_gen/test/slack_utils.py b/python/sglang/multimodal_gen/test/slack_utils.py
@@ -9,6 +9,8 @@
 from urllib.parse import urlparse
 from urllib.request import urlopen
 
+from sglang.multimodal_gen.runtime.utils.perf_logger import get_git_commit_hash
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
@@ -47,7 +49,7 @@
 
 def _get_status_message(run_id, current_case_id, thread_messages=None):
     date_str = datetime.now().strftime("%d/%m")
-    base_header = f""""🧵 for nightly test of {date_str}*
+    base_header = f"""🧵 for nightly test of {date_str}*
 *Git Revision:* {get_git_commit_hash()}
 *GitHub Run ID:* {run_id}
 *Total Tasks:* {len(ALL_CASES)}