Skip to content

Commit

Permalink
refine trainer_builder
Browse files Browse the repository at this point in the history
  • Loading branch information
zigzagcai committed Aug 11, 2024
1 parent 6a5b667 commit 7696aac
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions internlm/core/trainer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ def _initialize_memory_profiler(self, model, optimizer, profiling) -> Optional[S
+ f"wp{gpc.get_local_rank(ParallelMode.WEIGHT)}_"
+ f"tp{gpc.get_local_rank(ParallelMode.TENSOR)}",
)
return None
else:
return None

def _initialize_batch_skipper(self, train_state) -> BatchSkipper:
skip_batches = gpc.config.data.skip_batches
Expand Down Expand Up @@ -346,7 +347,11 @@ def _record_metrics(self, batch_count: int, batch, start_time, loss, moe_loss, s
)

def _should_evaluate(self) -> bool:
return gpc.config.data.valid_every > 0 and self.train_state.step_count % gpc.config.data.valid_every == 0
return (
gpc.config.data.valid_every > 0
and self.train_state.step_count > 0
and self.train_state.step_count % gpc.config.data.valid_every == 0
)

def _evaluate(self):
evaluate_on_val_dls(
Expand Down

0 comments on commit 7696aac

Please sign in to comment.