Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
<li>Qwen-VL (7B)</li>
<li>Qwen2-VL (2B, 7B, 72B)</li>
<li>Qwen2.5-VL (3B, 7B, 72B)</li>
<li>Qwen3-VL (2B - 235B)</li>
<li>DeepSeek-VL (7B)</li>
<li>DeepSeek-VL2 (3B, 16B, 27B)</li>
<li>InternVL-Chat (v1.1-v1.5)</li>
Expand Down
1 change: 1 addition & 0 deletions README_ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
<li>Qwen-VL (7B)</li>
<li>Qwen2-VL (2B, 7B, 72B)</li>
<li>Qwen2.5-VL (3B, 7B, 72B)</li>
<li>Qwen3-VL (2B - 235B)</li>
<li>DeepSeek-VL (7B)</li>
<li>DeepSeek-VL2 (3B, 16B, 27B)</li>
<li>InternVL-Chat (v1.1-v1.5)</li>
Expand Down
1 change: 1 addition & 0 deletions README_zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
<li>Qwen-VL (7B)</li>
<li>Qwen2-VL (2B, 7B, 72B)</li>
<li>Qwen2.5-VL (3B, 7B, 72B)</li>
<li>Qwen3-VL (2B - 235B)</li>
<li>DeepSeek-VL (7B)</li>
<li>DeepSeek-VL2 (3B, 16B, 27B)</li>
<li>InternVL-Chat (v1.1-v1.5)</li>
Expand Down
1 change: 1 addition & 0 deletions docs/en/supported_models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
| Qwen3 | 0.6B - 235B | LLM | Yes | Yes | Yes\* | - | Yes\* |
| QWen2-VL | 2B, 7B | MLLM | Yes | Yes | No | No | Yes |
| QWen2.5-VL | 3B - 72B | MLLM | Yes | No | No | No | No |
| QWen3-VL | 2B - 235B | MLLM | Yes | No | No | No | No |
| DeepSeek-MoE | 16B | LLM | Yes | No | No | No | No |
| DeepSeek-V2 | 16B, 236B | LLM | Yes | No | No | No | No |
| DeepSeek-V2.5 | 236B | LLM | Yes | No | No | No | No |
Expand Down
1 change: 1 addition & 0 deletions docs/zh_cn/supported_models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
| Qwen3 | 0.6B - 235B | LLM | Yes | Yes | Yes\* | - | Yes |
| QWen2-VL | 2B, 7B | MLLM | Yes | Yes | No | No | Yes |
| QWen2.5-VL | 3B - 72B | MLLM | Yes | No | No | No | No |
| QWen3-VL | 2B - 235B | MLLM | Yes | No | No | No | No |
| DeepSeek-MoE | 16B | LLM | Yes | No | No | No | No |
| DeepSeek-V2 | 16B, 236B | LLM | Yes | No | No | No | No |
| DeepSeek-V2.5 | 236B | LLM | Yes | No | No | No | No |
Expand Down
6 changes: 3 additions & 3 deletions lmdeploy/archs.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def check_vl_llm(config: dict) -> bool:
'LlavaLlamaForCausalLM', 'LlavaMistralForCausalLM', 'CogVLMForCausalLM', 'InternLMXComposer2ForCausalLM',
'InternVLChatModel', 'MiniCPMV', 'LlavaForConditionalGeneration', 'LlavaNextForConditionalGeneration',
'Phi3VForCausalLM', 'Qwen2VLForConditionalGeneration', 'Qwen2_5_VLForConditionalGeneration',
'MllamaForConditionalGeneration', 'MolmoForCausalLM', 'Gemma3ForConditionalGeneration',
'Llama4ForConditionalGeneration', 'InternVLForConditionalGeneration', 'InternS1ForConditionalGeneration',
'Glm4vForConditionalGeneration'
'Qwen3VLForConditionalGeneration', 'Qwen3VLMoeForConditionalGeneration', 'MllamaForConditionalGeneration',
'MolmoForCausalLM', 'Gemma3ForConditionalGeneration', 'Llama4ForConditionalGeneration',
'InternVLForConditionalGeneration', 'InternS1ForConditionalGeneration', 'Glm4vForConditionalGeneration'
])
if arch == 'QWenLMHeadModel' and 'visual' in config:
return True
Expand Down
12 changes: 10 additions & 2 deletions lmdeploy/pytorch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,17 @@ def _update_torch_dtype(config: 'ModelConfig', dtype: str):
config.dtype = torch.float16
return config

torch_dtype = getattr(config.hf_config, 'dtype', None)
language_hf_config = config.hf_config

# for multi-modal models, get the language model config to determine dtype
if hasattr(config.hf_config, 'text_config'):
language_hf_config = config.hf_config.text_config
elif hasattr(config.hf_config, 'llm_config'):
language_hf_config = config.hf_config.llm_config

torch_dtype = getattr(language_hf_config, 'dtype', None)
if torch_dtype is None:
torch_dtype = getattr(config.hf_config, 'torch_dtype', None)
torch_dtype = getattr(language_hf_config, 'torch_dtype', None)

# deal with case when torch_dtype is not string but torch.dtype
if isinstance(torch_dtype, torch.dtype):
Expand Down
8 changes: 8 additions & 0 deletions lmdeploy/pytorch/configurations/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,16 @@ def condition(cls, hf_config):
@classmethod
def build(cls, hf_config, model_path: str = None, **kwargs):
"""build."""

# for multi-modal models, get the language model config to build model config
if hasattr(hf_config, 'text_config'):
hf_config = hf_config.text_config
elif hasattr(hf_config, 'llm_config'):
hf_config = hf_config.llm_config

head_dim = getattr(hf_config, 'head_dim', None)
head_dim = head_dim or hf_config.hidden_size // hf_config.num_attention_heads

# head_dim should not be None
hf_config.head_dim = head_dim
num_attention_heads = hf_config.num_attention_heads
Expand Down
3 changes: 2 additions & 1 deletion lmdeploy/pytorch/engine/model_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ def model_forward(
output = model(**input_dict)

# InternVL-3.5-Flash will change the seqlen, model_metas during forward
model_metas = context.model_metas
if context.model_metas is not None and context.model_metas[0] is not None:
model_metas = context.model_metas
seq_length = context.q_seqlens[:len(inputs.seq_length)]

return dict(hidden_states=output, model_metas=model_metas, seq_length=seq_length)
Expand Down
12 changes: 12 additions & 0 deletions lmdeploy/pytorch/models/module_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@
f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration',
})

# qwen3_vl
MODULE_MAP.update({
'Qwen3VLForConditionalGeneration':
f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_vl.Qwen3VLForConditionalGeneration',
})

# qwen3_vl_moe
MODULE_MAP.update({
'Qwen3VLMoeForConditionalGeneration':
f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_vl_moe.Qwen3VLMoeForConditionalGeneration',
})

# starcoder2
MODULE_MAP.update({
'Starcoder2ForCausalLM': f'{LMDEPLOY_PYTORCH_MODEL_PATH}.starcoder2.Starcoder2ForCausalLM',
Expand Down
2 changes: 1 addition & 1 deletion lmdeploy/pytorch/models/qwen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
head_dim,
num_kv_heads=num_key_value_heads,
v_head_size=head_dim,
sliding_window=config.sliding_window,
sliding_window=getattr(config, 'sliding_window', None),
)

# o_proj
Expand Down
2 changes: 1 addition & 1 deletion lmdeploy/pytorch/models/qwen3_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
head_dim,
num_kv_heads=num_key_value_heads,
v_head_size=head_dim,
sliding_window=config.sliding_window,
sliding_window=getattr(config, 'sliding_window', None),
)

# o_proj
Expand Down
Loading