From 3cac3f5832b3c10fa4ec5aa608a8b415d30f0055 Mon Sep 17 00:00:00 2001 From: chenwenxiaolive Date: Mon, 9 Mar 2026 05:24:16 +0800 Subject: [PATCH 1/2] fix: pass rope_scaling=None to get_rope to avoid unhashable dict error Newer versions of transformers (>=5.1.0) initialize rope_scaling as a dict instead of None, which causes TypeError with @lru_cache since dicts are unhashable. Since RoPE scaling is not yet implemented, explicitly pass None to avoid the error. Fixes #167 Co-Authored-By: Claude Opus 4.6 --- nanovllm/models/qwen3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanovllm/models/qwen3.py b/nanovllm/models/qwen3.py index 5d39e0b90..a3ea74441 100755 --- a/nanovllm/models/qwen3.py +++ b/nanovllm/models/qwen3.py @@ -56,7 +56,7 @@ def __init__( rotary_dim=self.head_dim, max_position=max_position, base=rope_theta, - rope_scaling=rope_scaling, + rope_scaling=None, ) self.attn = Attention( self.num_heads, From b4e226eba7df812ced4754c5ef12d75b3d6b04b5 Mon Sep 17 00:00:00 2001 From: chenwenxiaolive Date: Sun, 29 Mar 2026 01:44:40 +0800 Subject: [PATCH 2/2] fix: normalize default rope scaling for qwen3 --- nanovllm/layers/rotary_embedding.py | 32 ++++++++++++++++++++++++++--- nanovllm/models/qwen3.py | 4 ++-- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/nanovllm/layers/rotary_embedding.py b/nanovllm/layers/rotary_embedding.py index 998d11646..489afa787 100644 --- a/nanovllm/layers/rotary_embedding.py +++ b/nanovllm/layers/rotary_embedding.py @@ -48,14 +48,40 @@ def forward( return query, key +def _normalize_rope_scaling( + rope_scaling: dict | None, +) -> None: + if rope_scaling is None: + return None + rope_type = rope_scaling.get("rope_type", rope_scaling.get("type")) + if rope_type == "default": + return None + if rope_type is None and set(rope_scaling).issubset({"rope_theta"}): + return None + raise NotImplementedError( + f"Unsupported rope_scaling={rope_scaling!r}. " + "nano-vllm only supports default RoPE without scaling." + ) + + @lru_cache(1) -def get_rope( +def _get_rope( head_size: int, rotary_dim: int, max_position: int, base: float, - rope_scaling: dict | None = None, + rope_scaling: None = None, ): - assert rope_scaling is None rotary_emb = RotaryEmbedding(head_size, rotary_dim, max_position, base) return rotary_emb + + +def get_rope( + head_size: int, + rotary_dim: int, + max_position: int, + base: float, + rope_scaling: dict | None = None, +): + rope_scaling = _normalize_rope_scaling(rope_scaling) + return _get_rope(head_size, rotary_dim, max_position, base, rope_scaling) diff --git a/nanovllm/models/qwen3.py b/nanovllm/models/qwen3.py index a3ea74441..71c3b6e5d 100755 --- a/nanovllm/models/qwen3.py +++ b/nanovllm/models/qwen3.py @@ -23,7 +23,7 @@ def __init__( rms_norm_eps: float = 1e-06, qkv_bias: bool = False, rope_theta: float = 10000, - rope_scaling: tuple | None = None, + rope_scaling: dict | None = None, ) -> None: super().__init__() tp_size = dist.get_world_size() @@ -56,7 +56,7 @@ def __init__( rotary_dim=self.head_dim, max_position=max_position, base=rope_theta, - rope_scaling=None, + rope_scaling=rope_scaling, ) self.attn = Attention( self.num_heads,