sheim · angelayixuanli · Sep 6, 2025 · Oct 23, 2025 · Oct 24, 2025 · Oct 29, 2025
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,8 @@ gym/wandb
 *.npz
 user/wandb_config.json
 *trajectories/
+*.png
+scaling_analysis/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py
@@ -15,6 +15,8 @@
     "MiniCheetah": ".mini_cheetah.mini_cheetah",
     "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref",
     "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc",
+    "Horse": ".horse.horse",
+    "HorseOsc": ".horse.horse_osc",
     "MIT_Humanoid": ".mit_humanoid.mit_humanoid",
     "Anymal": ".anymal_c.anymal",
     "A1": ".a1.a1",
@@ -28,6 +30,8 @@
     "MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config",
     "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config",
     "MiniCheetahSACCfg": ".mini_cheetah.mini_cheetah_SAC_config",
+    "HorseCfg": ".horse.horse_config",
+    "HorseOscCfg": ".horse.horse_osc_config",
     "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config",
     "A1Cfg": ".a1.a1_config",
     "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config",
@@ -42,6 +46,8 @@
     "MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config",
     "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config",
     "MiniCheetahSACRunnerCfg": ".mini_cheetah.mini_cheetah_SAC_config",
+    "HorseRunnerCfg": ".horse.horse_config",
+    "HorseOscRunnerCfg": ".horse.horse_osc_config",
     "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config",
     "A1RunnerCfg": ".a1.a1_config",
     "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config",
@@ -68,6 +74,12 @@
         "MiniCheetahSACCfg",
         "MiniCheetahSACRunnerCfg"
     ],
+    "horse": ["Horse", "HorseCfg", "HorseRunnerCfg"],
+    "horse_osc": [
+        "HorseOsc",
+        "HorseOscCfg",
+        "HorseOscRunnerCfg",
+    ],
     "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"],
     "humanoid_running": [
         "HumanoidRunning",

diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
@@ -517,8 +517,9 @@ def _init_buffers(self):
             dtype=torch.float,
             device=self.device,
         )
+        # add height as the 4th command
         self.commands = torch.zeros(
-            self.num_envs, 3, dtype=torch.float, device=self.device
+            self.num_envs, 4, dtype=torch.float, device=self.device
         )
         self.base_lin_vel = quat_rotate_inverse(
             self.base_quat, self.root_states[:, 7:10]
@@ -986,6 +987,31 @@ def _sqrdexp(self, x, scale=1.0):
             -torch.square(x / scale) / self.cfg.reward_settings.tracking_sigma
         )
 
+    def _process_rigid_body_props(self, props, env_id):
+        if env_id == 0:
+            # * init buffers for the domain rand changes
+            self.mass = torch.zeros(self.num_envs, 1, device=self.device)
+            self.com = torch.zeros(self.num_envs, 3, device=self.device)
+
+        # * randomize mass
+        if self.cfg.domain_rand.randomize_base_mass:
+            lower = self.cfg.domain_rand.lower_mass_offset
+            upper = self.cfg.domain_rand.upper_mass_offset
+            # self.mass_
+            props[0].mass += np.random.uniform(lower, upper)
+            self.mass[env_id] = props[0].mass
+            # * randomize com position
+            lower = self.cfg.domain_rand.lower_z_offset
+            upper = self.cfg.domain_rand.upper_z_offset
+            props[0].com.z += np.random.uniform(lower, upper)
+            self.com[env_id, 2] = props[0].com.z
+
+            lower = self.cfg.domain_rand.lower_x_offset
+            upper = self.cfg.domain_rand.upper_x_offset
+            props[0].com.x += np.random.uniform(lower, upper)
+            self.com[env_id, 0] = props[0].com.x
+        return props
+
     # ------------ reward functions----------------
 
     def _reward_lin_vel_z(self):

diff --git a/gym/envs/horse/horse.py b/gym/envs/horse/horse.py
@@ -0,0 +1,78 @@
+import torch
+
+from isaacgym.torch_utils import torch_rand_float
+from gym.envs.base.legged_robot import LeggedRobot
+
+
+class Horse(LeggedRobot):
+    def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
+        super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
+
+    def _reward_lin_vel_z(self):
+        """Penalize z axis base linear velocity with squared exp"""
+        return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"])
+
+    def _reward_ang_vel_xy(self):
+        """Penalize xy axes base angular velocity"""
+        error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"])
+        return torch.sum(error, dim=1)
+
+    def _reward_orientation(self):
+        """Penalize non-flat base orientation"""
+        error = (
+            torch.square(self.projected_gravity[:, :2])
+            / self.cfg.reward_settings.tracking_sigma
+        )
+        return torch.sum(torch.exp(-error), dim=1)
+
+    def _reward_min_base_height(self):
+        """Squared exponential saturating at base_height target"""
+        error = self.base_height - self.cfg.reward_settings.base_height_target
+        error /= self.scales["base_height"]
+        error = torch.clamp(error, max=0, min=None).flatten()
+        return self._sqrdexp(error)
+
+    def _reward_tracking_lin_vel(self):
+        """Tracking of linear velocity commands (xy axes)"""
+        # just use lin_vel?
+        error = self.commands[:, :2] - self.base_lin_vel[:, :2]
+        # * scale by (1+|cmd|): if cmd=0, no scaling.
+        error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2]))
+        error = torch.sum(torch.square(error), dim=1)
+        return torch.exp(-error / self.cfg.reward_settings.tracking_sigma)
+
+    def _reward_tracking_ang_vel(self):
+        """Tracking of angular velocity commands (yaw)"""
+        ang_vel_error = torch.square(
+            (self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0
+        )
+        return self._sqrdexp(ang_vel_error)
+
+    def _reward_dof_vel(self):
+        """Penalize dof velocities"""
+        return torch.sum(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1)
+
+    def _reward_dof_near_home(self):
+        return torch.sum(
+            self._sqrdexp(
+                (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"]
+            ),
+            dim=1,
+        )
+
+    def _resample_commands(self, env_ids):
+        super()._resample_commands(env_ids)
+
+        # resample height
+        height_range = self.command_ranges["height"]
+        self.commands[env_ids, 3] = torch_rand_float(
+            height_range[0], height_range[1], (len(env_ids), 1), device=self.device
+        ).squeeze(1)
+
+    def _reward_tracking_height(self):
+        """Reward for base height."""
+        # error between current and commanded height
+        error = self.base_height.flatten() - self.commands[:, 3].flatten()
+        error /= self.scales["base_height"]
+
+        return self._sqrdexp(error)