File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -656,7 +656,7 @@ def compute_attn_mask_seqlen(
656
656
cu_seqlens : torch .Tensor ,
657
657
) -> tuple [Optional [int ], Optional [list [int ]]]:
658
658
max_seqlen , seqlens = None , None
659
- if self .attn_backend == _Backend .FLASH_ATTN :
659
+ if self .attn_backend in [ _Backend .FLASH_ATTN , _Backend . FLASH_ATTN_VLLM_V1 ] :
660
660
max_seqlen = (cu_seqlens [1 :] - cu_seqlens [:- 1 ]).max ().item ()
661
661
elif self .attn_backend == _Backend .XFORMERS :
662
662
seqlens = (cu_seqlens [1 :] - cu_seqlens [:- 1 ]).tolist ()
Original file line number Diff line number Diff line change @@ -636,7 +636,7 @@ def compute_attn_mask_seqlen(
636
636
self , cu_seqlens : torch .Tensor
637
637
) -> tuple [Optional [int ], Optional [list [int ]]]:
638
638
max_seqlen , seqlens = None , None
639
- if self .attn_backend == _Backend .FLASH_ATTN :
639
+ if self .attn_backend == [ _Backend .FLASH_ATTN , _Backend . FLASH_ATTN_VLLM_V1 ] :
640
640
max_seqlen = (cu_seqlens [1 :] - cu_seqlens [:- 1 ]).max ().item ()
641
641
elif self .attn_backend == _Backend .XFORMERS :
642
642
seqlens = (cu_seqlens [1 :] - cu_seqlens [:- 1 ]).tolist ()
You can’t perform that action at this time.
0 commit comments