Skip to content

Commit 6b6e30a

Browse files
authored
[gaudi] Fix the Llama-4-Maverick-17B-128E crash issue (#3246)
Signed-off-by: yuanwu <[email protected]>
1 parent 70217ac commit 6b6e30a

File tree

1 file changed

+1
-2
lines changed

1 file changed

+1
-2
lines changed

backends/gaudi/server/text_generation_server/models/custom_modeling/flash_llama4_modeling.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
)
4949
from text_generation_server.models.custom_modeling.flash_llama_modeling import (
5050
FlashLlamaAttention,
51-
LlamaMLP,
5251
)
5352

5453

@@ -444,7 +443,7 @@ def __init__(self, prefix, config, weights, layer_idx):
444443
if self.is_moe_layer: # the 128E model interleaves dense / sparse
445444
self.feed_forward = Llama4TextMoe(f"{prefix}.feed_forward", config, weights)
446445
else:
447-
self.feed_forward = LlamaMLP(f"{prefix}.feed_forward", config, weights)
446+
self.feed_forward = Llama4TextMLP(f"{prefix}.feed_forward", config, weights)
448447

449448
self.input_layernorm = FastRMSNorm.load(
450449
prefix=f"{prefix}.input_layernorm",

0 commit comments

Comments
 (0)