From 34f2105277355059d6d7de2d54d492788fd76db2 Mon Sep 17 00:00:00 2001 From: "alessandro.assirelli" Date: Thu, 17 Apr 2025 18:13:42 +0200 Subject: [PATCH 1/3] add cap on gradient --- rsl_rl/algorithms/distillation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rsl_rl/algorithms/distillation.py b/rsl_rl/algorithms/distillation.py index 93d70ef2..c0e99ac0 100644 --- a/rsl_rl/algorithms/distillation.py +++ b/rsl_rl/algorithms/distillation.py @@ -25,6 +25,7 @@ def __init__( num_learning_epochs=1, gradient_length=15, learning_rate=1e-3, + max_grad_norm=1., loss_type="mse", device="cpu", # Distributed training parameters @@ -55,6 +56,8 @@ def __init__( self.num_learning_epochs = num_learning_epochs self.gradient_length = gradient_length self.learning_rate = learning_rate + self.max_grad_norm = max_grad_norm + # initialize the loss function if loss_type == "mse": @@ -127,6 +130,7 @@ def update(self): loss.backward() if self.is_multi_gpu: self.reduce_parameters() + nn.utils.clip_grad_norm_(self.policy.student.parameters(), self.max_grad_norm) self.optimizer.step() self.policy.detach_hidden_states() loss = 0 From 91fa7a5f10b87c14a1e17514fbc56ff6ea1b584a Mon Sep 17 00:00:00 2001 From: "alessandro.assirelli" Date: Fri, 9 May 2025 15:49:47 +0200 Subject: [PATCH 2/3] max_grad_norm not used as default --- rsl_rl/algorithms/distillation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rsl_rl/algorithms/distillation.py b/rsl_rl/algorithms/distillation.py index c0e99ac0..e926c243 100644 --- a/rsl_rl/algorithms/distillation.py +++ b/rsl_rl/algorithms/distillation.py @@ -25,7 +25,7 @@ def __init__( num_learning_epochs=1, gradient_length=15, learning_rate=1e-3, - max_grad_norm=1., + max_grad_norm=None, loss_type="mse", device="cpu", # Distributed training parameters @@ -58,7 +58,6 @@ def __init__( self.learning_rate = learning_rate self.max_grad_norm = max_grad_norm - # initialize the loss function if loss_type == "mse": self.loss_fn = nn.functional.mse_loss @@ -130,7 +129,8 @@ def update(self): loss.backward() if self.is_multi_gpu: self.reduce_parameters() - nn.utils.clip_grad_norm_(self.policy.student.parameters(), self.max_grad_norm) + if self.max_grad_norm: + nn.utils.clip_grad_norm_(self.policy.student.parameters(), self.max_grad_norm) self.optimizer.step() self.policy.detach_hidden_states() loss = 0 From e4a99555f1bf679fe31b0ee7b321f238de5e8569 Mon Sep 17 00:00:00 2001 From: "alessandro.assirelli" Date: Fri, 9 May 2025 15:50:30 +0200 Subject: [PATCH 3/3] run formatter --- rsl_rl/runners/on_policy_runner.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py index ecda87ae..d396d5b5 100644 --- a/rsl_rl/runners/on_policy_runner.py +++ b/rsl_rl/runners/on_policy_runner.py @@ -93,7 +93,9 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: str | None = None, dev # initialize algorithm alg_class = eval(self.alg_cfg.pop("class_name")) - self.alg: PPO | Distillation = alg_class(policy, device=self.device, **self.alg_cfg, multi_gpu_cfg=self.multi_gpu_cfg) + self.alg: PPO | Distillation = alg_class( + policy, device=self.device, **self.alg_cfg, multi_gpu_cfg=self.multi_gpu_cfg + ) # store training configuration self.num_steps_per_env = self.cfg["num_steps_per_env"] @@ -387,8 +389,13 @@ def log(self, locs: dict, width: int = 80, pad: int = 35): f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n""" f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n""" f"""{'Time elapsed:':>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time))}\n""" - f"""{'ETA:':>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time / (locs['it'] - locs['start_iter'] + 1) * ( - locs['start_iter'] + locs['num_learning_iterations'] - locs['it'])))}\n""" + f"""{'ETA:':>{pad}} {time.strftime( + "%H:%M:%S", + time.gmtime( + self.tot_time / (locs['it'] - locs['start_iter'] + 1) + * (locs['start_iter'] + locs['num_learning_iterations'] - locs['it']) + ) + )}\n""" ) print(log_string) @@ -513,16 +520,20 @@ def _configure_multi_gpu(self): # check if user has device specified for local rank if self.device != f"cuda:{self.gpu_local_rank}": - raise ValueError(f"Device '{self.device}' does not match expected device for local rank '{self.gpu_local_rank}'.") + raise ValueError( + f"Device '{self.device}' does not match expected device for local rank '{self.gpu_local_rank}'." + ) # validate multi-gpu configuration if self.gpu_local_rank >= self.gpu_world_size: - raise ValueError(f"Local rank '{self.gpu_local_rank}' is greater than or equal to world size '{self.gpu_world_size}'.") + raise ValueError( + f"Local rank '{self.gpu_local_rank}' is greater than or equal to world size '{self.gpu_world_size}'." + ) if self.gpu_global_rank >= self.gpu_world_size: - raise ValueError(f"Global rank '{self.gpu_global_rank}' is greater than or equal to world size '{self.gpu_world_size}'.") + raise ValueError( + f"Global rank '{self.gpu_global_rank}' is greater than or equal to world size '{self.gpu_world_size}'." + ) # initialize torch distributed - torch.distributed.init_process_group( - backend="nccl", rank=self.gpu_global_rank, world_size=self.gpu_world_size - ) + torch.distributed.init_process_group(backend="nccl", rank=self.gpu_global_rank, world_size=self.gpu_world_size) # set device to the local rank torch.cuda.set_device(self.gpu_local_rank)