Skip to content

Commit

Permalink
fix(isp.py): fix isp overlap backward allgather twice when activation…
Browse files Browse the repository at this point in the history
… ckpt 0.x (#366)
  • Loading branch information
huangting4201 authored Nov 29, 2024
1 parent 86c3b6c commit 4a6b453
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion internlm/core/parallel/comm/isp.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,8 +488,8 @@ def _pre_forward_hook_for_module(self, module: nn.Module, *args): # pylint: dis
self._wait_handle(module)

def _post_forward_hook_for_module(self, module: nn.Module, *args): # pylint: disable=W0613
self._clear_handle(module)
if not ((self._module_to_index[module] < self._ckpt_block_num) and self.is_forward is False):
self._clear_handle(module)
self._clear_weight(module)

def _pre_backward_hook_for_module(self, module: nn.Module, *args): # pylint: disable=W0613
Expand Down
2 changes: 1 addition & 1 deletion internlm/model/modeling_internlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def __init__(
mlp_layer_fusion=mlp_layer_fusion,
multiple_of=multiple_of,
# TODO: to support more activation functions
activation_type="swiglu" if use_swiglu else "swiglu",
activation_type="swiglu" if use_swiglu else "gelu",
)

self.use_swiglu = use_swiglu
Expand Down
2 changes: 1 addition & 1 deletion internlm/model/modeling_internlm2.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def __init__(
mlp_layer_fusion=mlp_layer_fusion,
multiple_of=multiple_of,
# TODO: to support more activation functions
activation_type="swiglu" if use_swiglu else "swiglu",
activation_type="swiglu" if use_swiglu else "gelu",
)

self.use_swiglu = use_swiglu
Expand Down
2 changes: 1 addition & 1 deletion internlm/model/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def __init__(
mlp_layer_fusion=mlp_layer_fusion,
multiple_of=multiple_of,
# TODO: to support more activation functions
activation_type="swiglu" if use_swiglu else "swiglu",
activation_type="swiglu" if use_swiglu else "gelu",
)

self.use_swiglu = use_swiglu
Expand Down
4 changes: 2 additions & 2 deletions internlm/model/modeling_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def __init__(
mlp_layer_fusion=mlp_layer_fusion,
multiple_of=multiple_of,
# TODO: to support more activation functions
activation_type="swiglu" if use_swiglu else "swiglu",
activation_type="swiglu" if use_swiglu else "gelu",
)
else:
# replace mlp by MoE module. The expert in MoE is a FeedForward module.
Expand All @@ -156,7 +156,7 @@ def __init__(
mlp_layer_fusion=mlp_layer_fusion,
multiple_of=multiple_of,
# TODO: to support more activation functions
activation_type="swiglu" if use_swiglu else "swiglu",
activation_type="swiglu" if use_swiglu else "gelu",
)

self.use_swiglu = use_swiglu
Expand Down
4 changes: 2 additions & 2 deletions internlm/model/modeling_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def __init__(
mlp_layer_fusion=mlp_layer_fusion,
multiple_of=multiple_of,
# TODO: to support more activation functions
activation_type="swiglu" if use_swiglu else "swiglu",
activation_type="swiglu" if use_swiglu else "gelu",
)
else:
# replace mlp by MoE module. The expert in MoE is a FeedForward module.
Expand All @@ -147,7 +147,7 @@ def __init__(
mlp_layer_fusion=mlp_layer_fusion,
multiple_of=multiple_of,
# TODO: to support more activation functions
activation_type="swiglu" if use_swiglu else "swiglu",
activation_type="swiglu" if use_swiglu else "gelu",
)

self.use_swiglu = use_swiglu
Expand Down

0 comments on commit 4a6b453

Please sign in to comment.