Skip to content

Commit

Permalink
fix(moe): change moe norm reduced group (#289)
Browse files Browse the repository at this point in the history
  • Loading branch information
blankde authored Jul 31, 2024
1 parent 57b7cd5 commit acfb11e
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion internlm/solver/optimizer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def compute_norm(gradients, parameters, norm_type=2, zero_mode=ParallelMode.ZERO
# model and zero have been reduced!!!
if zero_mode == ParallelMode.EXPERT_DATA:
pg = gpc.get_group(ParallelMode.EXPERT)
scaled_norm = total_norm * 1.0 / float(gpc.get_world_size(ParallelMode.DATA))
scaled_norm = total_norm * 1.0 / float(gpc.get_world_size(ParallelMode.EXPERT))
scaled_norm_tensor = torch.tensor(scaled_norm, device=get_current_device(), dtype=torch.float)
dist.all_reduce(scaled_norm_tensor, group=pg)
total_norm = scaled_norm_tensor.item()
Expand Down

0 comments on commit acfb11e

Please sign in to comment.