ServiceNow · rafapi · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/conf/rl_gsm8k.yaml b/conf/rl_gsm8k.yaml
@@ -42,7 +42,7 @@ vllm_config:
     # VLLM get log probs OOM https://github.com/vllm-project/vllm/issues/5907
     --enable-chunked-prefill: ""
 
-output_dir: outputs/rl_gsm8k_deepspeed
+output_dir: outputs/rl_gsm8k_deepspeed_llama31_70b_new_save_rl_no_kl_lr1e-6_1node
 accelerate_cfg_path: conf/accelerate/accelerate_base.yaml
 use_deepspeed: false
 

diff --git a/conf/rl_llama31_70b.yaml b/conf/rl_llama31_70b.yaml
@@ -0,0 +1,25 @@
+defaults:
+  - rl_gsm8k
+  - _self_
+
+finetune:
+  rl:
+    algo: reinforce
+    kl_coef: 0.0
+    reward_minus_kl_coef: 0.0
+    use_advantages: false
+    relu_log_p_weights: true
+  train_batch_size: 4
+  gradient_accumulation_passes: 16
+  learning_rate: 1e-6
+force_restart: true
+max_agent_forks: 5000
+model_path: /mnt/llmd/base_models/Meta-Llama-3.1-70B-Instruct
+n_workers_per_gpu: 16
+get_logprobs_workers_per_gpu: 1
+gpus_per_model_instance: 4
+use_rejection_sampling: true
+test_every_n_iterations: 10
+attempts: 8
+dataset_name: gsm8k
+use_deepspeed: true
diff --git a/rl_llama31_8b_deepspeed.yaml → conf/rl_llama31_8b_deepspeed.yaml b/rl_llama31_8b_deepspeed.yaml → conf/rl_llama31_8b_deepspeed.yaml
diff --git a/examples/rl_gsm8k/orchestrate_rl.py b/examples/rl_gsm8k/orchestrate_rl.py
@@ -315,9 +315,21 @@ def main(cfg: DictConfig):
     setup_logging(exp_path)
     logger.info(f"Current dir: {os.getcwd()}, output dir: {cfg.output_dir}")
     cfg.finetune.wandb_id = exp_path.name
-    run = init_wandb(cfg, exp_path, flatten_dict_config(cfg))
-    if run is None:
-        raise ValueError("Failed to initialize wandb run")
+
+    # Initialize wandb and handle failure gracefully
+    try:
+        run = init_wandb(cfg, exp_path, flatten_dict_config(cfg))
+    except Exception as e:
+        logger.warning(f"Failed to initialize wandb: {e}. Continuing without wandb logging.")
+        run = None
+
+    def safe_wandb_log(metrics, step):
+        if run is not None:
+            try:
+                wandb.log(metrics, step=step)
+            except Exception as e:
+                logger.warning(f"Failed to log to wandb: {e}")
+
     state_path = exp_path / "rl_state.json"
     state = load_state(state_path)
     # optionally clean all data at start time
@@ -429,10 +441,7 @@ def main(cfg: DictConfig):
             time_evaluation = stats["execution_time/test_make_data"]
         else:
             time_evaluation = 0
-        wandb.log(
-            stats,
-            step=state["iteration"],
-        )
+        safe_wandb_log(stats, step=state["iteration"])
 
         start_basemodel_logprobs = time.time()
         try:
@@ -472,7 +481,7 @@ def main(cfg: DictConfig):
             raise e
 
         time_populating_ref_logprobs = time.time() - start_basemodel_logprobs
-        wandb.log(
+        safe_wandb_log(
             {
                 "execution_time/populating_ref_logprobs": time_populating_ref_logprobs,
                 "execution_time/starting_assistantmodel_vllm": assistant_vllm_stats["starting_time"],
@@ -512,7 +521,7 @@ def main(cfg: DictConfig):
         )
         time_finetune = time.time() - start_finetune
         time_iteration = time.time() - start_iteration
-        wandb.log(
+        safe_wandb_log(
             {
                 "execution_time/finetune": time_finetune,
                 "execution_time/iteration": time_iteration,

diff --git a/examples/rl_gsm8k/utils.py b/examples/rl_gsm8k/utils.py
@@ -69,6 +69,11 @@ def __init__(
         self.stderr_file: Optional[TextIO] = None
         self.stats = {}
 
+        # Add node rank awareness
+        self.node_rank = int(os.environ.get("RANK", 0))
+        self.port_offset = self.node_rank * 1000  # Ensure different port ranges for each node
+        self.port = port + self.port_offset
+
     def get_base_urls(self) -> list[str]:
         return [
             f"http://127.0.0.1:{port}" for port in self.ports
@@ -133,9 +138,9 @@ def _start_service(self) -> None:
 
         threads = []
 
-        for i, device_number in enumerate(generate_cuda_device_strings(torch.cuda.device_count(), self.gpus_per_model_instance )):
+        for i, device_number in enumerate(generate_cuda_device_strings(torch.cuda.device_count(), self.gpus_per_model_instance)):
+            # Adjust port based on both node rank and GPU index
             port = self.port + i
-            # start_llm(device_number, port, assistant_procs, ports)
             thread = threading.Thread(target=self._start_llm, args=(device_number, port))
             threads.append(thread)
             thread.start()
@@ -354,8 +359,19 @@ def launch_training(
         ValueError: If no GPUs are available
         RuntimeError: If training process fails
     """
+    # environment variables
+    GLOBAL_RANK = int(os.environ.get("RANK", 0))
+    MASTER_PORT = int(os.environ.get("MASTER_PORT"))
+    MASTER_ADDRESS = os.environ.get("MASTER_ADDR")
+    # this is same as number_of_replicas
+    WORLD_SIZE = int(os.environ.get("WORLD_SIZE", 2))
+
     # Check GPU availability
     num_gpus = torch.cuda.device_count()
+    print('###############################')
+    print(f"Number of GPUs: {num_gpus}")
+    print('###############################')
+    is_multinode = num_gpus > 8
     if num_gpus == 0:
         raise ValueError("No GPUs available for finetuning")
 
@@ -381,6 +397,20 @@ def launch_training(
                 "--deepspeed_config_file",
                 "conf/accelerate/deepspeed_stage3_bf16.json",
             ]
+            if is_multinode:
+                base_cmd.extend([
+                    "--num_machines",
+                    WORLD_SIZE,
+                    "--machine_rank",
+                    GLOBAL_RANK,
+                    "--main_process_ip",
+                    MASTER_ADDRESS,
+                    "--main_process_port",
+                    MASTER_PORT,
+                    "--deepspeed_multinode_launcher",
+                    "standard",
+                    "--same_network",
+                ])
         else:
             base_cmd[2:2] = [
                 "--multi_gpu",

diff --git a/tapeagents/finetune/logging_.py b/tapeagents/finetune/logging_.py
@@ -18,12 +18,16 @@ def init_wandb(
     cfg: DictConfig,
     run_dir: Path,
     config_for_wandb: DictConfig | dict,
-) -> wandb_run.Run:
-    """Initialize W&B.
+) -> wandb_run.Run | None:
+    """Initialize W&B on the main process only.
 
     config_for_wandb is the configuration that will be logged to W&B.
-
+    Returns None if not on main process.
     """
+    # Only initialize on main process (rank 0)
+    if os.environ.get('RANK', '0') != '0':
+        return None
+
     if config_for_wandb is None:
         config_for_wandb = cfg.dict()