Renames observation types to observation sets

Mayankm96 · ClemensSchwarke · commit 3c12abb25eff · 2025-07-18T11:30:12.000+02:00
Approved-by: Clemens Schwarke
diff --git a/rsl_rl/env/vec_env.py b/rsl_rl/env/vec_env.py
@@ -67,28 +67,32 @@ def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.T
                 dones (torch.Tensor): Done flags from the environment. Shape: (num_envs,)
                 extras (dict): Extra information from the environment.
 
-        Notes:
-            Observations:
-            The observations TensorDict usually contains multiple observation groups. The `obs_groups` dictionary of the
-            runner configuration specifies which observation groups are used for which purpose, i.e., it maps the
-            available observation groups to observation types. The observation types (keys of the `obs_groups`
-            dictionary) currently used by rsl_rl are:
-                - "policy": Specified observation groups are used as input to the policy/actor network.
-                - "critic": Specified observation groups are used as input to the critic network.
-                - "teacher": Specified observation groups are used as input to the teacher network.
-                - "rnd_state": Specified observation groups are used as input to the RND network.
-            The "policy" observation type is always required. The other observation types are optional and if not
-            provided, the observation groups specified for the "policy" observation type are used.
-
-            Extras:
+        Observations:
+
+            The observations TensorDict usually contains multiple observation groups. The `obs_groups`
+            dictionary of the runner configuration specifies which observation groups are used for which
+            purpose, i.e., it maps the available observation groups to observation sets. The observation sets
+            (keys of the `obs_groups` dictionary) currently used by rsl_rl are:
+
+            - "policy": Specified observation groups are used as input to the actor/student network.
+            - "critic": Specified observation groups are used as input to the critic network.
+            - "teacher": Specified observation groups are used as input to the teacher network.
+            - "rnd_state": Specified observation groups are used as input to the RND network.
+
+            Incomplete or incorrect configurations are handled in the `resolve_obs_groups()` function in
+            `rsl_rl/utils/utils.py`.
+
+        Extras:
+
             The extras dictionary includes metrics such as the episode reward, episode length, etc. The following
             dictionary keys are used by rsl_rl:
-                - "time_outs" (torch.Tensor): Timeouts for the environments. These correspond to terminations that
-                    happen due to time limits and not due to the environment reaching a terminal state. This is useful
-                    for environments that have a fixed episode length.
 
-                - "log" (dict[str, float | torch.Tensor]): Additional information for logging and debugging purposes.
-                    The key should be a string and start with "/" for namespacing. The value can be a scalar or a
-                    tensor. If it is a tensor, the mean of the tensor is used for logging.
+            - "time_outs" (torch.Tensor): Timeouts for the environments. These correspond to terminations that
+               happen due to time limits and not due to the environment reaching a terminal state. This is useful
+               for environments that have a fixed episode length.
+
+            - "log" (dict[str, float | torch.Tensor]): Additional information for logging and debugging purposes.
+               The key should be a string and start with "/" for namespacing. The value can be a scalar or a
+               tensor. If it is a tensor, the mean of the tensor is used for logging.
         """
         raise NotImplementedError
diff --git a/rsl_rl/runners/distillation_runner.py b/rsl_rl/runners/distillation_runner.py
@@ -15,7 +15,7 @@
 from rsl_rl.env import VecEnv
 from rsl_rl.modules import StudentTeacher, StudentTeacherRecurrent
 from rsl_rl.runners import OnPolicyRunner
-from rsl_rl.utils import resolve_obs_types, store_code_state
+from rsl_rl.utils import resolve_obs_groups, store_code_state
 
 
 class DistillationRunner(OnPolicyRunner):
@@ -37,7 +37,7 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: str | None = None, dev
 
         # query observations from environment for algorithm construction
         obs = self.env.get_observations()
-        self.cfg["obs_groups"] = resolve_obs_types(obs, self.cfg["obs_groups"], default_types=["teacher"])
+        self.cfg["obs_groups"] = resolve_obs_groups(obs, self.cfg["obs_groups"], default_sets=["teacher"])
 
         # create the algorithm
         self.alg = self._construct_algorithm(obs)
diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py
@@ -16,7 +16,7 @@
 from rsl_rl.algorithms import PPO
 from rsl_rl.env import VecEnv
 from rsl_rl.modules import ActorCritic, ActorCriticRecurrent, resolve_rnd_config, resolve_symmetry_config
-from rsl_rl.utils import resolve_obs_types, store_code_state
+from rsl_rl.utils import resolve_obs_groups, store_code_state
 
 
 class OnPolicyRunner:
@@ -38,10 +38,10 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: str | None = None, dev
 
         # query observations from environment for algorithm construction
         obs = self.env.get_observations()
-        default_types = ["critic"]
+        default_sets = ["critic"]
         if "rnd_cfg" in self.alg_cfg and self.alg_cfg["rnd_cfg"] is not None:
-            default_types.append("rnd_state")
-        self.cfg["obs_groups"] = resolve_obs_types(obs, self.cfg["obs_groups"], default_types)
+            default_sets.append("rnd_state")
+        self.cfg["obs_groups"] = resolve_obs_groups(obs, self.cfg["obs_groups"], default_sets)
 
         # create the algorithm
         self.alg = self._construct_algorithm(obs)
diff --git a/rsl_rl/utils/utils.py b/rsl_rl/utils/utils.py
@@ -10,6 +10,7 @@
 import os
 import pathlib
 import torch
+import warnings
 from tensordict import TensorDict
 from typing import Callable
 
@@ -198,81 +199,106 @@ def string_to_callable(name: str) -> Callable:
         raise ValueError(msg)
 
 
-def resolve_obs_types(
-    obs: TensorDict, obs_groups: dict[str, list[str]], default_types: list[str]
+def resolve_obs_groups(
+    obs: TensorDict, obs_groups: dict[str, list[str]], default_sets: list[str]
 ) -> dict[str, list[str]]:
-    """Validates the observation types configuration and defaults missing observation types.
+    """Validates the observation configuration and defaults missing observation sets.
 
     The input is an observation dictionary `obs` containing observation groups and a configuration dictionary
-    `obs_groups` where the keys are the observation types and the values are lists of observation groups. A detailed
-    description is found in `rsl_rl/env/vec_env.py`.
+    `obs_groups` where the keys are the observation sets and the values are lists of observation groups.
 
     The configuration dictionary could for example look like:
         {
             "policy": ["group_1", "group_2"],
             "critic": ["group_1", "group_3"]
         }
 
-    This means that the 'policy' type will contain the observations "group_1" and "group_2" and the 'critic' type will
-    contain the observations "group_1" and "group_3". The function will check that all the observations in the 'policy'
-    and 'critic' groups are present in the observation dictionary from the environment.
+    This means that the 'policy' observation set will contain the observations "group_1" and "group_2" and the
+    'critic' observation set will contain the observations "group_1" and "group_3". This function will check that all
+    the observations in the 'policy' and 'critic' observation sets are present in the observation dictionary from the
+    environment.
 
-    Additionally, if one of the `default_types`, e.g. "critic", is not present in the configuration dictionary,
+    Additionally, if one of the `default_sets`, e.g. "critic", is not present in the configuration dictionary,
     this function will:
-        1. Check if a group with the same name exists in the observations and assign this group to the observation type.
-        2. If not, it will assign the observations from the 'policy' type to the observation type.
+
+    1. Check if a group with the same name exists in the observations and assign this group to the observation set.
+    2. If 1. fails, it will assign the observations from the 'policy' observation set to the default observation set.
 
     Args:
         obs: Observations from the environment in the form of a dictionary.
-        obs_groups: Observation types configuration.
-        default_types: Reserved type names used by the algorithm (besides 'policy').
+        obs_groups: Observation sets configuration.
+        default_sets: Reserved observation set names used by the algorithm (besides 'policy').
             If not provided in 'obs_groups', a default behavior gets triggered.
 
     Returns:
         The resolved observation groups.
 
     Raises:
-        ValueError: If the "policy" observation type is not present in the provided observation groups configuration.
-        ValueError: If any observation type is an empty list.
-        ValueError: If any observation type contains an observation term that is not present in the observations.
+        ValueError: If any observation set is an empty list.
+        ValueError: If any observation set contains an observation term that is not present in the observations.
     """
-    # check if policy observation type exists
+    # check if policy observation set exists
     if "policy" not in obs_groups.keys():
-        raise ValueError(
-            "The observation type configuration dictionary must contain the 'policy' key."
-            f" Found keys: {list(obs_groups.keys())}"
-        )
+        if "policy" in obs:
+            obs_groups["policy"] = ["policy"]
+            warnings.warn(
+                "The observation configuration dictionary 'obs_groups' must contain the 'policy' key."
+                " As an observation group with the name 'policy' was found, this is assumed to be the observation set."
+                " Consider adding the 'policy' key to the 'obs_groups' dictionary for clarity."
+                " This behavior will be removed in a future version."
+            )
+        else:
+            raise ValueError(
+                "The observation configuration dictionary 'obs_groups' must contain the 'policy' key."
+                f" Found keys: {list(obs_groups.keys())}"
+            )
 
-    # check all observation types for valid observation groups
-    for type, groups in obs_groups.items():
+    # check all observation sets for valid observation groups
+    for set_name, groups in obs_groups.items():
         # check if the list is empty
         if len(groups) == 0:
-            msg = f"The '{type}' key in the 'obs_groups' dictionary can not be an empty list."
-            if type in default_types:
-                if type not in obs:
-                    msg += " Consider removing the key to default to the observations used for the 'policy' type."
+            msg = f"The '{set_name}' key in the 'obs_groups' dictionary can not be an empty list."
+            if set_name in default_sets:
+                if set_name not in obs:
+                    msg += " Consider removing the key to default to the observations used for the 'policy' set."
                 else:
-                    msg += f" Consider removing the key to default to the observation '{type}' from the environment."
+                    msg += (
+                        f" Consider removing the key to default to the observation '{set_name}' from the environment."
+                    )
             raise ValueError(msg)
         # check groups exist inside the observations from the environment
         for group in groups:
             if group not in obs:
                 raise ValueError(
-                    f"Observation '{group}' in observation type '{type}' not found in the observations from the"
+                    f"Observation '{group}' in observation set '{set_name}' not found in the observations from the"
                     f" environment. Available observations from the environment: {list(obs.keys())}"
                 )
 
-    # fill missing observation types
-    for default_type in default_types:
-        if default_type not in obs_groups.keys():
-            if default_type in obs:
-                obs_groups[default_type] = [default_type]
+    # fill missing observation sets
+    for default_set_name in default_sets:
+        if default_set_name not in obs_groups.keys():
+            if default_set_name in obs:
+                obs_groups[default_set_name] = [default_set_name]
+                warnings.warn(
+                    f"The observation configuration dictionary 'obs_groups' must contain the '{default_set_name}' key."
+                    f" As an observation group with the name '{default_set_name}' was found, this is assumed to be the"
+                    f" observation set. Consider adding the '{default_set_name}' key to the 'obs_groups' dictionary for"
+                    " clarity. This behavior will be removed in a future version."
+                )
             else:
-                obs_groups[default_type] = obs_groups["policy"].copy()
+                obs_groups[default_set_name] = obs_groups["policy"].copy()
+                warnings.warn(
+                    f"The observation configuration dictionary 'obs_groups' must contain the '{default_set_name}' key."
+                    f" As the configuration for '{default_set_name}' is missing, the observations from the 'policy' set"
+                    f" are used. Consider adding the '{default_set_name}' key to the 'obs_groups' dictionary for"
+                    " clarity. This behavior will be removed in a future version."
+                )
 
-    # print the final parsed observation types
-    print("Resolved observation types: ")
-    for type, groups in obs_groups.items():
-        print("\t", type, ": ", groups)
+    # print the final parsed observation sets
+    print("-" * 80)
+    print("Resolved observation sets: ")
+    for set_name, groups in obs_groups.items():
+        print("\t", set_name, ": ", groups)
+    print("-" * 80)
 
     return obs_groups