Skip to content

Commit

Permalink
use already fused gate_up if fuse_mlp
Browse files Browse the repository at this point in the history
  • Loading branch information
jlonge4 committed Jan 21, 2025
1 parent 0b62ed7 commit 1c8a02c
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion optimum/neuron/models/phi4/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,9 @@ def load_weights(self):

# Note: Automatic MLP padding is safe since zeros are *only* introduced to intermediary state
if self.neuron_config.fuse_mlp:
assert fused_gate_up.shape[0] % self.config.tp_degree == 0, f"mlp weights are not divisible by tp_degree {self.config.tp_degree}"
assert (
fused_gate_up.shape[0] % self.config.tp_degree == 0
), f"mlp weights are not divisible by tp_degree {self.config.tp_degree}"
new_layer.add_mlp_input(fused_gate_up)
if self.neuron_config.mlp_out_weight_transpose:
new_layer.add_mlp_output(
Expand Down

0 comments on commit 1c8a02c

Please sign in to comment.