From aad0297ab4acd14b002118900c3db2bcede60aef Mon Sep 17 00:00:00 2001 From: allenzren Date: Fri, 3 Jan 2025 17:22:27 -0500 Subject: [PATCH 1/2] add separate eval model class that also initializes the pre-trained policy for early denoising steps --- README.md | 2 +- agent/finetune/train_agent.py | 2 +- model/diffusion/diffusion.py | 1 + model/diffusion/diffusion_eval_ft.py | 135 +++++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 model/diffusion/diffusion_eval_ft.py diff --git a/README.md b/README.md index 2d64563..4c83167 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ See [here](cfg/finetuning.md) for details of the experiments in the paper. * Videos of trials in Robomimic tasks can be recorded by specifying `env.save_video=True`, `train.render.freq=`, and `train.render.num=` in fine-tuning configs. ## Usage - Evaluation -Pre-trained or fine-tuned policies can be evaluated without running the fine-tuning script now. Some example configs are provided under `cfg/{gym/robomimic/furniture}/eval}` including ones below. Set `base_policy_path` to override the default checkpoint. +Pre-trained or fine-tuned policies can be evaluated without running the fine-tuning script now. Some example configs are provided under `cfg/{gym/robomimic/furniture}/eval}` including ones below. `ft_denoising_steps` needs to match fine-tuning config. Set `base_policy_path` to override the default checkpoint. ```console python script/run.py --config-name=eval_diffusion_mlp \ --config-dir=cfg/gym/eval/hopper-v2 diff --git a/agent/finetune/train_agent.py b/agent/finetune/train_agent.py index 4c68b51..dce3d9b 100644 --- a/agent/finetune/train_agent.py +++ b/agent/finetune/train_agent.py @@ -128,7 +128,7 @@ def save_model(self): data = { "itr": self.itr, "model": self.model.state_dict(), - } + } # right now `model` includes weights for `network`, `actor`, `actor_ft`. Weights for `network` is redundant, and we can use `actor` weights as the base policy (earlier denoising steps) and `actor_ft` weights as the fine-tuned policy (later denoising steps) during evaluation. savepath = os.path.join(self.checkpoint_dir, f"state_{self.itr}.pt") torch.save(data, savepath) log.info(f"Saved model to {savepath}") diff --git a/model/diffusion/diffusion.py b/model/diffusion/diffusion.py index f2ad1aa..e70c46c 100644 --- a/model/diffusion/diffusion.py +++ b/model/diffusion/diffusion.py @@ -289,6 +289,7 @@ def forward(self, cond, deterministic=True): t=t_b, cond=cond, index=index_b, + deterministic=deterministic, ) std = torch.exp(0.5 * logvar) diff --git a/model/diffusion/diffusion_eval_ft.py b/model/diffusion/diffusion_eval_ft.py new file mode 100644 index 0000000..99a7938 --- /dev/null +++ b/model/diffusion/diffusion_eval_ft.py @@ -0,0 +1,135 @@ +""" +For evaluating RL fine-tuned diffusion policy + +Account for frozen base policy for early denoising steps and fine-tuned policy for later denoising steps + +""" + +import copy +import logging + +import torch + +log = logging.getLogger(__name__) + +from model.diffusion.diffusion import DiffusionModel +from model.diffusion.sampling import extract + + +class DiffusionEvalFT(DiffusionModel): + def __init__( + self, + use_ddim, + ft_denoising_steps, + network_path, + **kwargs, + ): + # do not let base class load model + super().__init__(use_ddim=use_ddim, network_path=None, **kwargs) + self.ft_denoising_steps = ft_denoising_steps + checkpoint = torch.load( + network_path, map_location=self.device, weights_only=True + ) # 'network.mlp_mean...', 'actor.mlp_mean...', 'actor_ft.mlp_mean...' + + # Set up base model --- techncally not needed if all denoising steps are fine-tuned + self.actor = self.network + base_weights = { + key.split("actor.")[1]: checkpoint["model"][key] + for key in checkpoint["model"] + if "actor." in key + } + self.actor.load_state_dict(base_weights, strict=True) + logging.info("Loaded base policy weights from %s", network_path) + + # Always set up fine-tuned model + self.actor_ft = copy.deepcopy(self.network) + ft_weights = { + key.split("actor_ft.")[1]: checkpoint["model"][key] + for key in checkpoint["model"] + if "actor_ft." in key + } + self.actor_ft.load_state_dict(ft_weights, strict=True) + logging.info("Loaded fine-tuned policy weights from %s", network_path) + + # override + def p_mean_var( + self, + x, + t, + cond, + index=None, + deterministic=False, + ): + noise = self.actor(x, t, cond=cond) + if self.use_ddim: + ft_indices = torch.where( + index >= (self.ddim_steps - self.ft_denoising_steps) + )[0] + else: + ft_indices = torch.where(t < self.ft_denoising_steps)[0] + + # overwrite noise for fine-tuning steps + if len(ft_indices) > 0: + cond_ft = {key: cond[key][ft_indices] for key in cond} + noise_ft = self.actor_ft(x[ft_indices], t[ft_indices], cond=cond_ft) + noise[ft_indices] = noise_ft + + # Predict x_0 + if self.predict_epsilon: + if self.use_ddim: + """ + x₀ = (xₜ - √ (1-αₜ) ε )/ √ αₜ + """ + alpha = extract(self.ddim_alphas, index, x.shape) + alpha_prev = extract(self.ddim_alphas_prev, index, x.shape) + sqrt_one_minus_alpha = extract( + self.ddim_sqrt_one_minus_alphas, index, x.shape + ) + x_recon = (x - sqrt_one_minus_alpha * noise) / (alpha**0.5) + else: + """ + x₀ = √ 1\α̅ₜ xₜ - √ 1\α̅ₜ-1 ε + """ + x_recon = ( + extract(self.sqrt_recip_alphas_cumprod, t, x.shape) * x + - extract(self.sqrt_recipm1_alphas_cumprod, t, x.shape) * noise + ) + else: # directly predicting x₀ + x_recon = noise + if self.denoised_clip_value is not None: + x_recon.clamp_(-self.denoised_clip_value, self.denoised_clip_value) + if self.use_ddim: + # re-calculate noise based on clamped x_recon - default to false in HF, but let's use it here + noise = (x - alpha ** (0.5) * x_recon) / sqrt_one_minus_alpha + + # Clip epsilon for numerical stability in policy gradient - not sure if this is helpful yet, but the value can be huge sometimes. This has no effect if DDPM is used + if self.use_ddim and self.eps_clip_value is not None: + noise.clamp_(-self.eps_clip_value, self.eps_clip_value) + + # Get mu + if self.use_ddim: + """ + μ = √ αₜ₋₁ x₀ + √(1-αₜ₋₁ - σₜ²) ε + """ + if deterministic: + etas = torch.zeros((x.shape[0], 1, 1)).to(x.device) + else: + etas = self.eta(cond).unsqueeze(1) # B x 1 x (Da or 1) + sigma = ( + etas + * ((1 - alpha_prev) / (1 - alpha) * (1 - alpha / alpha_prev)) ** 0.5 + ).clamp_(min=1e-10) + dir_xt_coef = (1.0 - alpha_prev - sigma**2).clamp_(min=0).sqrt() + mu = (alpha_prev**0.5) * x_recon + dir_xt_coef * noise + var = sigma**2 + logvar = torch.log(var) + else: + """ + μₜ = β̃ₜ √ α̅ₜ₋₁/(1-α̅ₜ)x₀ + √ αₜ (1-α̅ₜ₋₁)/(1-α̅ₜ)xₜ + """ + mu = ( + extract(self.ddpm_mu_coef1, t, x.shape) * x_recon + + extract(self.ddpm_mu_coef2, t, x.shape) * x + ) + logvar = extract(self.ddpm_logvar_clipped, t, x.shape) + return mu, logvar From f2eddaf47d748af3be42fcc6033d9588ab8f81d7 Mon Sep 17 00:00:00 2001 From: allenzren Date: Fri, 3 Jan 2025 17:22:38 -0500 Subject: [PATCH 2/2] update eval configs --- cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml | 4 +++- cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml | 4 +++- cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml | 4 +++- cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml | 4 +++- cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml | 4 +++- cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml | 4 +++- cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml | 4 +++- cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml | 4 +++- cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml | 4 +++- cfg/gym/eval/kitchen-v0/eval_diffusion_mlp.yaml | 4 +++- cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml | 4 +++- cfg/robomimic/eval/can/eval_diffusion_mlp.yaml | 4 +++- cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml | 4 +++- cfg/robomimic/eval/can/eval_diffusion_unet.yaml | 4 +++- cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml | 4 +++- cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml | 4 +++- cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml | 4 +++- cfg/robomimic/eval/lift/eval_diffusion_unet.yaml | 4 +++- cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml | 4 +++- cfg/robomimic/eval/square/eval_diffusion_mlp.yaml | 4 +++- cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml | 4 +++- cfg/robomimic/eval/square/eval_diffusion_unet.yaml | 4 +++- cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml | 4 +++- cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml | 5 +++-- cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml | 4 +++- cfg/robomimic/eval/transport/eval_diffusion_unet.yaml | 4 +++- cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml | 4 +++- 27 files changed, 81 insertions(+), 28 deletions(-) diff --git a/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml b/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml index e3d0653..945a8d1 100644 --- a/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml +++ b/cfg/d3il/eval/avoid_m1/eval_diffusion_mlp.yaml @@ -19,6 +19,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 25 render_num: 40 @@ -47,7 +48,8 @@ env: reset_within_step: False model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 # diff --git a/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml index ea712a7..acca7c9 100644 --- a/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml +++ b/cfg/furniture/eval/lamp_low/eval_diffusion_mlp.yaml @@ -21,6 +21,7 @@ horizon_steps: 8 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} render_num: 0 @@ -41,7 +42,8 @@ env: sparse_reward: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml index 6de459a..d001ecf 100644 --- a/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml +++ b/cfg/furniture/eval/lamp_low/eval_diffusion_unet.yaml @@ -21,6 +21,7 @@ horizon_steps: 16 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} render_num: 0 @@ -41,7 +42,8 @@ env: sparse_reward: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml index 3694c70..e5fa3ac 100644 --- a/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml +++ b/cfg/furniture/eval/one_leg_low/eval_diffusion_mlp.yaml @@ -21,6 +21,7 @@ horizon_steps: 8 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} render_num: 0 @@ -41,7 +42,8 @@ env: sparse_reward: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml index c1626d9..ef0046b 100644 --- a/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml +++ b/cfg/furniture/eval/one_leg_low/eval_diffusion_unet.yaml @@ -21,6 +21,7 @@ horizon_steps: 16 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} render_num: 0 @@ -41,7 +42,8 @@ env: sparse_reward: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml b/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml index 9109faf..21c9c61 100644 --- a/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml +++ b/cfg/furniture/eval/round_table_low/eval_diffusion_mlp.yaml @@ -21,6 +21,7 @@ horizon_steps: 8 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} render_num: 0 @@ -41,7 +42,8 @@ env: sparse_reward: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml b/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml index daf050e..3dc8e45 100644 --- a/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml +++ b/cfg/furniture/eval/round_table_low/eval_diffusion_unet.yaml @@ -21,6 +21,7 @@ horizon_steps: 16 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: ${eval:'round(${env.max_episode_steps} / ${act_steps})'} render_num: 0 @@ -41,7 +42,8 @@ env: sparse_reward: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml index b8e0404..9da5d6e 100644 --- a/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml +++ b/cfg/gym/eval/halfcheetah-v2/eval_diffusion_mlp.yaml @@ -19,6 +19,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. render_num: 0 @@ -40,7 +41,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 # diff --git a/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml index 4c0fcac..dcc7f9f 100644 --- a/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml +++ b/cfg/gym/eval/hopper-v2/eval_diffusion_mlp.yaml @@ -19,6 +19,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. render_num: 0 @@ -40,7 +41,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 # diff --git a/cfg/gym/eval/kitchen-v0/eval_diffusion_mlp.yaml b/cfg/gym/eval/kitchen-v0/eval_diffusion_mlp.yaml index f74e0fc..1f83839 100644 --- a/cfg/gym/eval/kitchen-v0/eval_diffusion_mlp.yaml +++ b/cfg/gym/eval/kitchen-v0/eval_diffusion_mlp.yaml @@ -19,6 +19,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 70 render_num: 0 @@ -40,7 +41,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 # diff --git a/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml b/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml index 1b1d2b0..26eede9 100644 --- a/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml +++ b/cfg/gym/eval/walker2d-v2/eval_diffusion_mlp.yaml @@ -19,6 +19,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 250 # each episode can take maximum (max_episode_steps / act_steps, =250 right now) steps but may finish earlier in gym. We only count episodes finished within n_steps for evaluation. render_num: 0 @@ -40,7 +41,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 # diff --git a/cfg/robomimic/eval/can/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/can/eval_diffusion_mlp.yaml index d19359e..1bbb789 100644 --- a/cfg/robomimic/eval/can/eval_diffusion_mlp.yaml +++ b/cfg/robomimic/eval/can/eval_diffusion_mlp.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 300 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -44,7 +45,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml index 55db305..ebd8979 100644 --- a/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml +++ b/cfg/robomimic/eval/can/eval_diffusion_mlp_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 4 act_steps: 4 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 300 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -58,7 +59,8 @@ shape_meta: shape: [7] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/can/eval_diffusion_unet.yaml b/cfg/robomimic/eval/can/eval_diffusion_unet.yaml index 2d1ac3b..faa59cd 100644 --- a/cfg/robomimic/eval/can/eval_diffusion_unet.yaml +++ b/cfg/robomimic/eval/can/eval_diffusion_unet.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 75 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -44,7 +45,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml index 3b8f643..4bc3f30 100644 --- a/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml +++ b/cfg/robomimic/eval/can/eval_diffusion_unet_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 4 act_steps: 4 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 300 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -58,7 +59,8 @@ shape_meta: shape: [7] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml index 6a0aa81..6c9804b 100644 --- a/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml +++ b/cfg/robomimic/eval/lift/eval_diffusion_mlp.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 300 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -44,7 +45,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml index bf8c232..95bae04 100644 --- a/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml +++ b/cfg/robomimic/eval/lift/eval_diffusion_mlp_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 4 act_steps: 4 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 300 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -58,7 +59,8 @@ shape_meta: shape: [7] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml b/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml index 800354c..63c729f 100644 --- a/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml +++ b/cfg/robomimic/eval/lift/eval_diffusion_unet.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 75 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -44,7 +45,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml index 35c567b..7628e92 100644 --- a/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml +++ b/cfg/robomimic/eval/lift/eval_diffusion_unet_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 4 act_steps: 4 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 300 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -58,7 +59,8 @@ shape_meta: shape: [7] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml index 1009edc..3fe3b54 100644 --- a/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml +++ b/cfg/robomimic/eval/square/eval_diffusion_mlp.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 400 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -44,7 +45,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml index 624a1b3..f20620a 100644 --- a/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml +++ b/cfg/robomimic/eval/square/eval_diffusion_mlp_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 4 act_steps: 4 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 400 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -58,7 +59,8 @@ shape_meta: shape: [7] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/square/eval_diffusion_unet.yaml b/cfg/robomimic/eval/square/eval_diffusion_unet.yaml index 7280703..1e07042 100644 --- a/cfg/robomimic/eval/square/eval_diffusion_unet.yaml +++ b/cfg/robomimic/eval/square/eval_diffusion_unet.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 4 act_steps: 4 +ft_denoising_steps: 10 n_steps: 100 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -44,7 +45,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml index d35d975..db74d97 100644 --- a/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml +++ b/cfg/robomimic/eval/square/eval_diffusion_unet_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 4 act_steps: 4 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 400 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -58,7 +59,8 @@ shape_meta: shape: [7] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml b/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml index 23826aa..cd15d1d 100644 --- a/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml +++ b/cfg/robomimic/eval/transport/eval_diffusion_mlp.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 8 act_steps: 8 +ft_denoising_steps: 10 n_steps: 400 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -46,9 +47,9 @@ env: max_episode_steps: ${env.max_episode_steps} reset_within_step: True - model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml b/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml index 7413246..d585b3c 100644 --- a/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml +++ b/cfg/robomimic/eval/transport/eval_diffusion_mlp_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 8 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 200 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -62,7 +63,8 @@ shape_meta: shape: [14] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml b/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml index e644bfc..4ee0c42 100644 --- a/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml +++ b/cfg/robomimic/eval/transport/eval_diffusion_unet.yaml @@ -20,6 +20,7 @@ denoising_steps: 20 cond_steps: 1 horizon_steps: 16 act_steps: 8 +ft_denoising_steps: 10 n_steps: 100 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -47,7 +48,8 @@ env: reset_within_step: True model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3 diff --git a/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml b/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml index 81b0046..3c1b1f9 100644 --- a/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml +++ b/cfg/robomimic/eval/transport/eval_diffusion_unet_img.yaml @@ -23,6 +23,7 @@ horizon_steps: 16 act_steps: 8 use_ddim: True ddim_steps: 5 +ft_denoising_steps: 5 n_steps: 400 # each episode takes max_episode_steps / act_steps steps render_num: 0 @@ -62,7 +63,8 @@ shape_meta: shape: [14] model: - _target_: model.diffusion.diffusion.DiffusionModel + _target_: model.diffusion.diffusion_eval_ft.DiffusionEvalFT + ft_denoising_steps: ${ft_denoising_steps} predict_epsilon: True denoised_clip_value: 1.0 randn_clip_value: 3