Skip to content

Commit 37e7446

Browse files
committed
Fix how max seq len is calculate
Signed-off-by: Vinay Damodaran <[email protected]>
1 parent 396643d commit 37e7446

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

vllm/model_executor/models/qwen2_5_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ def compute_attn_mask_seqlen(
656656
cu_seqlens: torch.Tensor,
657657
) -> tuple[Optional[int], Optional[list[int]]]:
658658
max_seqlen, seqlens = None, None
659-
if self.attn_backend == _Backend.FLASH_ATTN:
659+
if self.attn_backend in [_Backend.FLASH_ATTN, _Backend.FLASH_ATTN_VLLM_V1]:
660660
max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item()
661661
elif self.attn_backend == _Backend.XFORMERS:
662662
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()

vllm/model_executor/models/qwen2_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ def compute_attn_mask_seqlen(
636636
self, cu_seqlens: torch.Tensor
637637
) -> tuple[Optional[int], Optional[list[int]]]:
638638
max_seqlen, seqlens = None, None
639-
if self.attn_backend == _Backend.FLASH_ATTN:
639+
if self.attn_backend == [_Backend.FLASH_ATTN, _Backend.FLASH_ATTN_VLLM_V1]:
640640
max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item()
641641
elif self.attn_backend == _Backend.XFORMERS:
642642
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()

0 commit comments

Comments
 (0)