sgl-project · ash-sigh · Oct 10, 2025 · Oct 10, 2025 · Oct 13, 2025
@@ -119,7 +119,7 @@ def alloc_extend(
             assert len(torch.unique(out_indices)) == len(out_indices)
 
         self.free_pages = self.free_pages[num_new_pages_item:]
-        return out_indices
+        return out_indices.int()
 
     def alloc_decode(
         self,

@@ -75,7 +75,7 @@ def replay(
             self.positions[: self.raw_num_token].copy_(forward_batch.positions)
 
         # Replay
-        if self.model_runner.model_config.index_head_dim is None:
+        if getattr(self.model_runner.model_config, "index_head_dim", None) is None:
             seq_lens = forward_batch.seq_lens.cpu().tolist() + [0] * (
                 self.bs - self.raw_bs
             )

@@ -237,8 +237,10 @@ def process_mm_data(
             if not _is_npu:
                 kwargs["device"] = "cuda"
             elif processor.__class__.__name__ not in {
+                "Qwen2VLProcessor",
                 "Qwen2_5_VLProcessor",
                 "Qwen3VLProcessor",
+                "Glm4vProcessor",
             }:
                 # Note: for qwen-vl, processor has some reshape issue because of dims restriction on Ascend.
                 kwargs["device"] = "npu"