From 34f2105277355059d6d7de2d54d492788fd76db2 Mon Sep 17 00:00:00 2001
From: "alessandro.assirelli" <alessandro.assirelli@agile-robots.com>
Date: Thu, 17 Apr 2025 18:13:42 +0200
Subject: [PATCH 1/3] add cap on gradient

---
 rsl_rl/algorithms/distillation.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/rsl_rl/algorithms/distillation.py b/rsl_rl/algorithms/distillation.py
index 93d70ef2..c0e99ac0 100644
--- a/rsl_rl/algorithms/distillation.py
+++ b/rsl_rl/algorithms/distillation.py
@@ -25,6 +25,7 @@ def __init__(
         num_learning_epochs=1,
         gradient_length=15,
         learning_rate=1e-3,
+        max_grad_norm=1.,
         loss_type="mse",
         device="cpu",
         # Distributed training parameters
@@ -55,6 +56,8 @@ def __init__(
         self.num_learning_epochs = num_learning_epochs
         self.gradient_length = gradient_length
         self.learning_rate = learning_rate
+        self.max_grad_norm = max_grad_norm
+
 
         # initialize the loss function
         if loss_type == "mse":
@@ -127,6 +130,7 @@ def update(self):
                     loss.backward()
                     if self.is_multi_gpu:
                         self.reduce_parameters()
+                    nn.utils.clip_grad_norm_(self.policy.student.parameters(), self.max_grad_norm)
                     self.optimizer.step()
                     self.policy.detach_hidden_states()
                     loss = 0

From 91fa7a5f10b87c14a1e17514fbc56ff6ea1b584a Mon Sep 17 00:00:00 2001
From: "alessandro.assirelli" <alessandro.assirelli@agile-robots.com>
Date: Fri, 9 May 2025 15:49:47 +0200
Subject: [PATCH 2/3] max_grad_norm not used as default

---
 rsl_rl/algorithms/distillation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rsl_rl/algorithms/distillation.py b/rsl_rl/algorithms/distillation.py
index c0e99ac0..e926c243 100644
--- a/rsl_rl/algorithms/distillation.py
+++ b/rsl_rl/algorithms/distillation.py
@@ -25,7 +25,7 @@ def __init__(
         num_learning_epochs=1,
         gradient_length=15,
         learning_rate=1e-3,
-        max_grad_norm=1.,
+        max_grad_norm=None,
         loss_type="mse",
         device="cpu",
         # Distributed training parameters
@@ -58,7 +58,6 @@ def __init__(
         self.learning_rate = learning_rate
         self.max_grad_norm = max_grad_norm
 
-
         # initialize the loss function
         if loss_type == "mse":
             self.loss_fn = nn.functional.mse_loss
@@ -130,7 +129,8 @@ def update(self):
                     loss.backward()
                     if self.is_multi_gpu:
                         self.reduce_parameters()
-                    nn.utils.clip_grad_norm_(self.policy.student.parameters(), self.max_grad_norm)
+                    if self.max_grad_norm:
+                        nn.utils.clip_grad_norm_(self.policy.student.parameters(), self.max_grad_norm)
                     self.optimizer.step()
                     self.policy.detach_hidden_states()
                     loss = 0

From e4a99555f1bf679fe31b0ee7b321f238de5e8569 Mon Sep 17 00:00:00 2001
From: "alessandro.assirelli" <alessandro.assirelli@agile-robots.com>
Date: Fri, 9 May 2025 15:50:30 +0200
Subject: [PATCH 3/3] run formatter

---
 rsl_rl/runners/on_policy_runner.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py
index ecda87ae..d396d5b5 100644
--- a/rsl_rl/runners/on_policy_runner.py
+++ b/rsl_rl/runners/on_policy_runner.py
@@ -93,7 +93,9 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: str | None = None, dev
 
         # initialize algorithm
         alg_class = eval(self.alg_cfg.pop("class_name"))
-        self.alg: PPO | Distillation = alg_class(policy, device=self.device, **self.alg_cfg, multi_gpu_cfg=self.multi_gpu_cfg)
+        self.alg: PPO | Distillation = alg_class(
+            policy, device=self.device, **self.alg_cfg, multi_gpu_cfg=self.multi_gpu_cfg
+        )
 
         # store training configuration
         self.num_steps_per_env = self.cfg["num_steps_per_env"]
@@ -387,8 +389,13 @@ def log(self, locs: dict, width: int = 80, pad: int = 35):
             f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
             f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
             f"""{'Time elapsed:':>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time))}\n"""
-            f"""{'ETA:':>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time / (locs['it'] - locs['start_iter'] + 1) * (
-                               locs['start_iter'] + locs['num_learning_iterations'] - locs['it'])))}\n"""
+            f"""{'ETA:':>{pad}} {time.strftime(
+                "%H:%M:%S",
+                time.gmtime(
+                    self.tot_time / (locs['it'] - locs['start_iter'] + 1)
+                    * (locs['start_iter'] + locs['num_learning_iterations'] - locs['it'])
+                )
+            )}\n"""
         )
         print(log_string)
 
@@ -513,16 +520,20 @@ def _configure_multi_gpu(self):
 
         # check if user has device specified for local rank
         if self.device != f"cuda:{self.gpu_local_rank}":
-            raise ValueError(f"Device '{self.device}' does not match expected device for local rank '{self.gpu_local_rank}'.")
+            raise ValueError(
+                f"Device '{self.device}' does not match expected device for local rank '{self.gpu_local_rank}'."
+            )
         # validate multi-gpu configuration
         if self.gpu_local_rank >= self.gpu_world_size:
-            raise ValueError(f"Local rank '{self.gpu_local_rank}' is greater than or equal to world size '{self.gpu_world_size}'.")
+            raise ValueError(
+                f"Local rank '{self.gpu_local_rank}' is greater than or equal to world size '{self.gpu_world_size}'."
+            )
         if self.gpu_global_rank >= self.gpu_world_size:
-            raise ValueError(f"Global rank '{self.gpu_global_rank}' is greater than or equal to world size '{self.gpu_world_size}'.")
+            raise ValueError(
+                f"Global rank '{self.gpu_global_rank}' is greater than or equal to world size '{self.gpu_world_size}'."
+            )
 
         # initialize torch distributed
-        torch.distributed.init_process_group(
-            backend="nccl", rank=self.gpu_global_rank, world_size=self.gpu_world_size
-        )
+        torch.distributed.init_process_group(backend="nccl", rank=self.gpu_global_rank, world_size=self.gpu_world_size)
         # set device to the local rank
         torch.cuda.set_device(self.gpu_local_rank)