try to support etp

Prayer3th · Prayer3th · commit e65e46cd4fe5 · 2025-11-03T14:52:40.000+08:00
diff --git a/python/sgl_jax/srt/layers/moe.py b/python/sgl_jax/srt/layers/moe.py
@@ -380,6 +380,9 @@ def _gmm_compute_with_sharded_weights(
             tiling=tiling_down,
         )
 
+        if self.tp_size > 1:
+            intermediate_output = jax.lax.psum(intermediate_output, "tensor")
+
         return intermediate_output
 
     def _expert_all_to_all_dispatch(self, data, sorted_experts, expert_shard_id):

Original file line number	Diff line number	Diff line change
`@@ -380,6 +380,9 @@ def _gmm_compute_with_sharded_weights(`
`380`	`380`	`tiling=tiling_down,`
`381`	`381`	`)`
`382`	`382`
	`383`	`+ if self.tp_size > 1:`
	`384`	`+ intermediate_output = jax.lax.psum(intermediate_output, "tensor")`
	`385`	`+`
`383`	`386`	`return intermediate_output`
`384`	`387`
`385`	`388`	`def _expert_all_to_all_dispatch(self, data, sorted_experts, expert_shard_id):`