Update moe_layer.py

fix alibaba#397
cosmosZhou · Dec 22, 2024 · 8150c02 · 8150c02
1 parent 011e96d
commit 8150c02
Showing 1 changed file with 1 addition and 0 deletions.
diff --git a/megatron_patch/model/qwen2/moe/moe_layer.py b/megatron_patch/model/qwen2/moe/moe_layer.py
@@ -100,6 +100,7 @@ def __init__(
             )
             self.shared_expert = MLP(self.config, mlpSubmodules, is_expert=False, is_shared_expert=True)
             self.shared_expert_gate = torch.nn.Linear(config.hidden_size, 1, bias=False)
+            self.shared_expert_gate.weight.sequence_parallel = config.sequence_parallel
 
         if self.config.moe_grouped_gemm:
             if isinstance(self.submodules, MLPSubmodules):