Update moe.py

suexu1025 · web-flow · commit be4470c45bc9 · 2025-10-31T11:58:29.000-07:00
diff --git a/src/MaxText/layers/moe.py b/src/MaxText/layers/moe.py
@@ -579,7 +579,7 @@ def unpermute(
       if self.config.decoder_block == ctypes.DecoderBlockType.LLAMA4:
         # For Llama4, combine using weights of 1 for selected experts
         reshaped_weights = jnp.ones_like(reshaped_weights)
-      if self.config.weight_sum_fp32:
+      if self.config.float32_weight_sum:
         reshaped_intermediate = reshaped_intermediate.astype(jnp.float32)
         reshaped_weights = reshaped_weights.astype(jnp.float32)
       output = jnp.einsum(
@@ -1676,7 +1676,7 @@ def dense_matmul(
       with jax.named_scope("w_sum"):
         if is_llama4_decoder_layer:
           weights = self.reshape_and_update_weights(jnp.ones_like(top_k_weights), top_k_indices)
-        if self.config.weight_sum_fp32:
+        if self.config.float32_weight_sum:
           intermediate_layer = intermediate_layer.astype(jnp.float32)
           weights = weights.astype(jnp.float32)
         # cast to f32 for sum up in einsum op