diff --git a/.gitignore b/.gitignore
index c78e6696..2a17069a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,8 @@ gym/wandb
 *.npz
 user/wandb_config.json
 *trajectories/
+*.png
+scaling_analysis/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py
index f9d8b788..25b20da1 100644
--- a/gym/envs/__init__.py
+++ b/gym/envs/__init__.py
@@ -15,6 +15,8 @@
     "MiniCheetah": ".mini_cheetah.mini_cheetah",
     "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref",
     "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc",
+    "Horse": ".horse.horse",
+    "HorseOsc": ".horse.horse_osc",
     "MIT_Humanoid": ".mit_humanoid.mit_humanoid",
     "Anymal": ".anymal_c.anymal",
     "A1": ".a1.a1",
@@ -28,6 +30,8 @@
     "MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config",
     "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config",
     "MiniCheetahSACCfg": ".mini_cheetah.mini_cheetah_SAC_config",
+    "HorseCfg": ".horse.horse_config",
+    "HorseOscCfg": ".horse.horse_osc_config",
     "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config",
     "A1Cfg": ".a1.a1_config",
     "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config",
@@ -42,6 +46,8 @@
     "MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config",
     "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config",
     "MiniCheetahSACRunnerCfg": ".mini_cheetah.mini_cheetah_SAC_config",
+    "HorseRunnerCfg": ".horse.horse_config",
+    "HorseOscRunnerCfg": ".horse.horse_osc_config",
     "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config",
     "A1RunnerCfg": ".a1.a1_config",
     "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config",
@@ -68,6 +74,12 @@
         "MiniCheetahSACCfg",
         "MiniCheetahSACRunnerCfg"
     ],
+    "horse": ["Horse", "HorseCfg", "HorseRunnerCfg"],
+    "horse_osc": [
+        "HorseOsc",
+        "HorseOscCfg",
+        "HorseOscRunnerCfg",
+    ],
     "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"],
     "humanoid_running": [
         "HumanoidRunning",
diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
index 404c62e4..2344e05f 100644
--- a/gym/envs/base/legged_robot.py
+++ b/gym/envs/base/legged_robot.py
@@ -517,8 +517,9 @@ def _init_buffers(self):
             dtype=torch.float,
             device=self.device,
         )
+        # add height as the 4th command
         self.commands = torch.zeros(
-            self.num_envs, 3, dtype=torch.float, device=self.device
+            self.num_envs, 4, dtype=torch.float, device=self.device
         )
         self.base_lin_vel = quat_rotate_inverse(
             self.base_quat, self.root_states[:, 7:10]
@@ -986,6 +987,31 @@ def _sqrdexp(self, x, scale=1.0):
             -torch.square(x / scale) / self.cfg.reward_settings.tracking_sigma
         )
 
+    def _process_rigid_body_props(self, props, env_id):
+        if env_id == 0:
+            # * init buffers for the domain rand changes
+            self.mass = torch.zeros(self.num_envs, 1, device=self.device)
+            self.com = torch.zeros(self.num_envs, 3, device=self.device)
+
+        # * randomize mass
+        if self.cfg.domain_rand.randomize_base_mass:
+            lower = self.cfg.domain_rand.lower_mass_offset
+            upper = self.cfg.domain_rand.upper_mass_offset
+            # self.mass_
+            props[0].mass += np.random.uniform(lower, upper)
+            self.mass[env_id] = props[0].mass
+            # * randomize com position
+            lower = self.cfg.domain_rand.lower_z_offset
+            upper = self.cfg.domain_rand.upper_z_offset
+            props[0].com.z += np.random.uniform(lower, upper)
+            self.com[env_id, 2] = props[0].com.z
+
+            lower = self.cfg.domain_rand.lower_x_offset
+            upper = self.cfg.domain_rand.upper_x_offset
+            props[0].com.x += np.random.uniform(lower, upper)
+            self.com[env_id, 0] = props[0].com.x
+        return props
+
     # ------------ reward functions----------------
 
     def _reward_lin_vel_z(self):
diff --git a/gym/envs/horse/horse.py b/gym/envs/horse/horse.py
new file mode 100644
index 00000000..bb2e545e
--- /dev/null
+++ b/gym/envs/horse/horse.py
@@ -0,0 +1,78 @@
+import torch
+
+from isaacgym.torch_utils import torch_rand_float
+from gym.envs.base.legged_robot import LeggedRobot
+
+
+class Horse(LeggedRobot):
+    def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
+        super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
+
+    def _reward_lin_vel_z(self):
+        """Penalize z axis base linear velocity with squared exp"""
+        return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"])
+
+    def _reward_ang_vel_xy(self):
+        """Penalize xy axes base angular velocity"""
+        error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"])
+        return torch.sum(error, dim=1)
+
+    def _reward_orientation(self):
+        """Penalize non-flat base orientation"""
+        error = (
+            torch.square(self.projected_gravity[:, :2])
+            / self.cfg.reward_settings.tracking_sigma
+        )
+        return torch.sum(torch.exp(-error), dim=1)
+
+    def _reward_min_base_height(self):
+        """Squared exponential saturating at base_height target"""
+        error = self.base_height - self.cfg.reward_settings.base_height_target
+        error /= self.scales["base_height"]
+        error = torch.clamp(error, max=0, min=None).flatten()
+        return self._sqrdexp(error)
+
+    def _reward_tracking_lin_vel(self):
+        """Tracking of linear velocity commands (xy axes)"""
+        # just use lin_vel?
+        error = self.commands[:, :2] - self.base_lin_vel[:, :2]
+        # * scale by (1+|cmd|): if cmd=0, no scaling.
+        error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2]))
+        error = torch.sum(torch.square(error), dim=1)
+        return torch.exp(-error / self.cfg.reward_settings.tracking_sigma)
+
+    def _reward_tracking_ang_vel(self):
+        """Tracking of angular velocity commands (yaw)"""
+        ang_vel_error = torch.square(
+            (self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0
+        )
+        return self._sqrdexp(ang_vel_error)
+
+    def _reward_dof_vel(self):
+        """Penalize dof velocities"""
+        return torch.sum(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1)
+
+    def _reward_dof_near_home(self):
+        return torch.sum(
+            self._sqrdexp(
+                (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"]
+            ),
+            dim=1,
+        )
+
+    def _resample_commands(self, env_ids):
+        super()._resample_commands(env_ids)
+
+        # resample height
+        height_range = self.command_ranges["height"]
+        self.commands[env_ids, 3] = torch_rand_float(
+            height_range[0], height_range[1], (len(env_ids), 1), device=self.device
+        ).squeeze(1)
+
+    def _reward_tracking_height(self):
+        """Reward for base height."""
+        # error between current and commanded height
+        error = self.base_height.flatten() - self.commands[:, 3].flatten()
+        error /= self.scales["base_height"]
+
+        return self._sqrdexp(error)
diff --git a/gym/envs/horse/horse_config.py b/gym/envs/horse/horse_config.py
new file mode 100644
index 00000000..699aff5f
--- /dev/null
+++ b/gym/envs/horse/horse_config.py
@@ -0,0 +1,262 @@
+from gym.envs.base.legged_robot_config import (
+    LeggedRobotCfg,
+    LeggedRobotRunnerCfg,
+)
+
+BASE_HEIGHT_REF = 1.3
+
+
+class HorseCfg(LeggedRobotCfg):
+    class env(LeggedRobotCfg.env):
+        num_envs = 2**12
+        num_actuators = 1 + 4 * 5
+        episode_length_s = 10
+
+    class terrain(LeggedRobotCfg.terrain):
+        mesh_type = "plane"
+
+    class init_state(LeggedRobotCfg.init_state):
+        default_joint_angles = {
+            "haa": 0.0,
+            "hfe": 0.0,
+            "kfe": 0.0,
+            "pfe": 0.0,
+            "pastern_to_foot": 0.0,
+            "base_joint": 0.0,
+        }
+
+        # * reset setup chooses how the initial conditions are chosen.
+        # * "reset_to_basic" = a single position
+        # * "reset_to_range" = uniformly random from a range defined below
+        reset_mode = "reset_to_range"
+
+        # * default COM for basic initialization
+        pos = [0.0, 0.0, 1.4]  # x,y,z [m]
+        rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
+        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
+        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+
+        # * initialization for random range setup
+        # these are the physical limits in the URDF as of 17 Nov 2025
+        # dof_pos_range = {
+        #     "haa": [-0.2, 0.2],
+        #     "hfe": [-0.7, 0.6],
+        #     "kfe": [-1.3, 0.1],
+        #     "pfe": [-0.3, 2.2],
+        #     "pastern_to_foot": [-0.3, 1.8],
+        #     "base_joint": [-0.2, 0.2],
+        # }
+        dof_pos_range = {
+            "haa": [-0.2, 0.2],
+            "hfe": [-0.7, 0.6],
+            "kfe": [-1.3, 0.1],
+            "pfe": [-0.3, 2.2],
+            "pastern_to_foot": [-0.3, 1.8],
+            "base_joint": [-0.2, 0.2],
+        }
+        dof_vel_range = {
+            "haa": [-0.2, 0.2],
+            "hfe": [-0.2, 0.2],
+            "kfe": [-0.2, 0.2],
+            "pfe": [-0.2, 0.2],
+            "pastern_to_foot": [-0.2, 0.2],
+            "base_joint": [-0.2, 0.2],
+        }
+
+        root_pos_range = [
+            [0.0, 0.0],  # x
+            [0.0, 0.0],  # y
+            [1.3, 1.3],  # z
+            [-0.2, 0.2],  # roll
+            [-0.2, 0.2],  # pitch
+            [-0.2, 0.2],  # yaw
+        ]
+        root_vel_range = [
+            [-0.5, 5.0],  # x
+            [0.0, 0.0],  # y
+            [-0.05, 0.05],  # z
+            [0.0, 0.0],  # roll
+            [0.0, 0.0],  # pitch
+            [0.0, 0.0],  # yaw
+        ]
+
+    class control(LeggedRobotCfg.control):
+        # * PD Drive parameters:
+        stiffness = {
+            "haa": 4000,
+            "hfe": 4000,
+            "kfe": 4000,
+            "pfe": 4000,
+            "pastern_to_foot": 4000,
+            "base_joint": 50,
+        }
+        damping = {
+            "haa": 250,
+            "hfe": 250,
+            "kfe": 250,
+            "pfe": 250,
+            "pastern_to_foot": 250,
+            "base_joint": 10,
+        }
+        ctrl_frequency = 250  # how often the PDF controller/action updates run
+        desired_sim_frequency = 500  # how often the physics is calculated
+
+    class commands:
+        # * time before command are changed[s]
+        resampling_time = 3.0
+
+        class ranges:
+            lin_vel_x = [-1.0, 8.0]  # min max [m/s]
+            lin_vel_y = 1.0  # max [m/s]
+            yaw_vel = 6  # max [rad/s]
+            height = [0.61, 1.30]  # m
+
+    class push_robots:
+        toggle = False
+        interval_s = 1
+        max_push_vel_xy = 0.5
+        push_box_dims = [0.3, 0.1, 0.1]  # x,y,z [m]
+
+    class domain_rand:
+        randomize_friction = True
+        friction_range = [0.5, 5.0]
+        randomize_base_mass = False
+        added_mass_range = [-1.0, 1.0]
+
+    class asset(LeggedRobotCfg.asset):
+        file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf"
+        foot_name = "foot"
+        penalize_contacts_on = ["shank"]
+        terminate_after_contacts_on = ["base"]
+        end_effector_names = ["foot"]
+        collapse_fixed_joints = False
+        self_collisions = 1
+        flip_visual_attachments = False
+        disable_gravity = False
+        disable_motors = False
+        joint_damping = 0.3
+        fix_base_link = False
+        rotor_inertia = [0.002268] + 4 * (
+            [0.002268, 0.002268, 0.005484, 0.005484, 0.005484]
+        )
+
+    class reward_settings(LeggedRobotCfg.reward_settings):
+        soft_dof_pos_limit = 0.9
+        soft_dof_vel_limit = 0.9
+        soft_torque_limit = 0.9
+        max_contact_force = 600.0
+        base_height_target = BASE_HEIGHT_REF
+        tracking_sigma = 0.25
+
+    class scaling(LeggedRobotCfg.scaling):
+        base_ang_vel = 0.3
+        base_lin_vel = BASE_HEIGHT_REF
+        dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5])
+        base_height = BASE_HEIGHT_REF
+        dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1])
+        dof_pos_obs = dof_pos
+        dof_pos_target = [2.0 * x for x in dof_pos]  # target joint angles
+        tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300])  # not being used
+        commands = [3, 1, 3, 1]  # add height as a command
+
+
+class HorseRunnerCfg(LeggedRobotRunnerCfg):
+    seed = -1
+    runner_class_name = "OnPolicyRunner"
+
+    class actor(LeggedRobotRunnerCfg.actor):
+        hidden_dims = [256, 256, 128]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+        obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "dof_pos_target",
+        ]
+        normalize_obs = True
+        actions = ["dof_pos_target"]
+        add_noise = True
+        disable_actions = False
+
+        class noise:
+            scale = 1.0
+            dof_pos_obs = 0.01
+            base_ang_vel = 0.01
+            dof_pos = 0.005
+            dof_vel = 0.005
+            lin_vel = 0.05
+            ang_vel = [0.3, 0.15, 0.4]
+            gravity_vec = 0.1
+
+    class critic(LeggedRobotRunnerCfg.critic):
+        hidden_dims = [128, 64]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+        obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "dof_pos_target",
+        ]
+        normalize_obs = True
+
+        class reward:
+            class weights:
+                tracking_lin_vel = 4.0
+                tracking_ang_vel = 2.0
+                lin_vel_z = 0.0
+                ang_vel_xy = 0.01
+                orientation = 1.0
+                torques = 5.0e-7
+                dof_vel = 0.0
+                min_base_height = 1.5
+                action_rate = 1e-4
+                action_rate2 = 1e-5
+                stand_still = 0.0
+                dof_pos_limits = 0.0
+                feet_contact_forces = 0.0
+                dof_near_home = 0.0
+                tracking_height = 1.5
+
+            class termination_weight:
+                termination = 0.01
+
+    class algorithm(LeggedRobotRunnerCfg.algorithm):
+        class algorithm:
+            # both
+            gamma = 0.99
+            lam = 0.95
+            # shared
+            batch_size = 2**15
+            max_gradient_steps = 24
+            # new
+            storage_size = 2**17  # new
+            batch_size = 2**15  #  new
+
+            clip_param = 0.2
+            learning_rate = 1.0e-3
+            max_grad_norm = 1.0
+            # Critic
+            use_clipped_value_loss = True
+            # Actor
+            entropy_coef = 0.01
+            schedule = "adaptive"  # could be adaptive, fixed
+            desired_kl = 0.01
+            lr_range = [2e-4, 1e-2]
+            lr_ratio = 1.3
+
+    class runner(LeggedRobotRunnerCfg.runner):
+        run_name = ""
+        experiment_name = "horse"
+        max_iterations = 1000
+        algorithm_class_name = "PPO2"
+        num_steps_per_env = 32
diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py
new file mode 100644
index 00000000..5c0ef2c1
--- /dev/null
+++ b/gym/envs/horse/horse_osc.py
@@ -0,0 +1,725 @@
+import torch
+from isaacgym.torch_utils import torch_rand_float
+
+from gym.envs.horse.horse import Horse
+
+from isaacgym import gymtorch
+
+HORSE_WEIGHT = 536.38 * 9.81  # Weight of horse in Newtons
+BASE_HEIGHT_REF = 1.3
+
+
+class HorseOsc(Horse):
+    def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
+        super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
+
+    def _init_buffers(self):
+        super()._init_buffers()
+
+        BASE = 0
+
+        RH = dict(haa=1, hfe=2, kfe=3, pfe=4, pastern=5)
+        LH = dict(haa=6, hfe=7, kfe=8, pfe=9, pastern=10)
+        RF = dict(haa=11, hfe=12, kfe=13, pfe=14, pastern=15)
+        LF = dict(haa=16, hfe=17, kfe=18, pfe=19, pastern=20)
+
+        # tensors for vectorized ops
+        self.idx = {
+            "base": torch.tensor([BASE], device=self.device),
+            # all legs by joint type (RH, LH, RF, LF order)
+            "haa": torch.tensor(
+                [RH["haa"], LH["haa"], RF["haa"], LF["haa"]], device=self.device
+            ),
+            "hfe": torch.tensor(
+                [RH["hfe"], LH["hfe"], RF["hfe"], LF["hfe"]], device=self.device
+            ),
+            "kfe": torch.tensor(
+                [RH["kfe"], LH["kfe"], RF["kfe"], LF["kfe"]], device=self.device
+            ),
+            "pfe": torch.tensor(
+                [RH["pfe"], LH["pfe"], RF["pfe"], LF["pfe"]], device=self.device
+            ),
+            "pastern": torch.tensor(
+                [RH["pastern"], LH["pastern"], RF["pastern"], LF["pastern"]],
+                device=self.device,
+            ),
+            # hind vs front splits (HIND = RH, LH / FRONT = RF, LF)
+            "hind_haa": torch.tensor([RH["haa"], LH["haa"]], device=self.device),
+            "hind_hfe": torch.tensor([RH["hfe"], LH["hfe"]], device=self.device),
+            "hind_kfe": torch.tensor([RH["kfe"], LH["kfe"]], device=self.device),
+            "hind_pfe": torch.tensor([RH["pfe"], LH["pfe"]], device=self.device),
+            "hind_pastern": torch.tensor(
+                [RH["pastern"], LH["pastern"]], device=self.device
+            ),
+            "front_haa": torch.tensor([RF["haa"], LF["haa"]], device=self.device),
+            "front_hfe": torch.tensor([RF["hfe"], LF["hfe"]], device=self.device),
+            "front_kfe": torch.tensor([RF["kfe"], LF["kfe"]], device=self.device),
+            "front_pfe": torch.tensor([RF["pfe"], LF["pfe"]], device=self.device),
+            "front_pastern": torch.tensor(
+                [RF["pastern"], LF["pastern"]], device=self.device
+            ),
+            # legs (all joints within each leg)
+            "rh_leg": torch.tensor(
+                [RH["haa"], RH["hfe"], RH["kfe"], RH["pfe"], RH["pastern"]],
+                device=self.device,
+            ),
+            "lh_leg": torch.tensor(
+                [LH["haa"], LH["hfe"], LH["kfe"], LH["pfe"], LH["pastern"]],
+                device=self.device,
+            ),
+            "rf_leg": torch.tensor(
+                [RF["haa"], RF["hfe"], RF["kfe"], RF["pfe"], RF["pastern"]],
+                device=self.device,
+            ),
+            "lf_leg": torch.tensor(
+                [LF["haa"], LF["hfe"], LF["kfe"], LF["pfe"], LF["pastern"]],
+                device=self.device,
+            ),
+            # group by front or hind legs
+            "hind_legs": torch.tensor(
+                [
+                    RH["haa"],
+                    RH["hfe"],
+                    RH["kfe"],
+                    RH["pfe"],
+                    RH["pastern"],
+                    LH["haa"],
+                    LH["hfe"],
+                    LH["kfe"],
+                    LH["pfe"],
+                    LH["pastern"],
+                ],
+                device=self.device,
+            ),
+            "front_legs": torch.tensor(
+                [
+                    RF["haa"],
+                    RF["hfe"],
+                    RF["kfe"],
+                    RF["pfe"],
+                    RF["pastern"],
+                    LF["haa"],
+                    LF["hfe"],
+                    LF["kfe"],
+                    LF["pfe"],
+                    LF["pastern"],
+                ],
+                device=self.device,
+            ),
+        }
+
+        self.prev_height_error = torch.zeros(self.num_envs, device=self.device)
+        self.lie_down_done = torch.zeros(
+            self.num_envs, dtype=torch.bool, device=self.device
+        )
+
+        self.oscillators = torch.zeros(self.num_envs, 4, device=self.device)
+        self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device)
+
+        self.oscillators_vel = torch.zeros_like(self.oscillators)
+        self.grf = torch.zeros(self.num_envs, 4, device=self.device)
+        self.osc_omega = self.cfg.osc.omega * torch.ones(
+            self.num_envs, 1, device=self.device
+        )
+        self.osc_coupling = self.cfg.osc.coupling * torch.ones(
+            self.num_envs, 1, device=self.device
+        )
+        self.osc_offset = self.cfg.osc.offset * torch.ones(
+            self.num_envs, 1, device=self.device
+        )
+
+        env_ids = torch.arange(self.num_envs, device=self.device, dtype=torch.long)
+
+        self._reset_to_custom_pos(env_ids, self.root_states[env_ids, 2].clone())
+
+        # resample height command
+        h_min, h_max = self.cfg.commands.ranges.height
+        self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand(
+            (len(env_ids),), device=self.device
+        )
+
+    def _reset_oscillators(self, env_ids):
+        if len(env_ids) == 0:
+            return
+            # * random
+        if self.cfg.osc.init_to == "random":
+            self.oscillators[env_ids] = torch_rand_float(
+                0,
+                2 * torch.pi,
+                shape=self.oscillators[env_ids].shape,
+                device=self.device,
+            )
+        elif self.cfg.osc.init_to == "standing":
+            self.oscillators[env_ids] = 3 * torch.pi / 2
+        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
+
+    def _reset_system(self, env_ids):
+        if len(env_ids) == 0:
+            return
+        self._reset_oscillators(env_ids)
+        self.oscillator_obs = torch.cat(
+            (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
+        )
+
+        # * keep some robots in the same starting state as they ended
+        timed_out_subset = (self.timed_out & ~self.terminated) * (
+            torch.rand(self.num_envs, device=self.device)
+            < self.cfg.init_state.timeout_reset_ratio
+        )
+
+        env_ids = (self.terminated | timed_out_subset).nonzero().flatten()
+        if len(env_ids) == 0:
+            return
+        super()._reset_system(env_ids)
+
+        # pose horse based set height
+        self._reset_to_custom_pos(env_ids, self.root_states[env_ids, 2].clone())
+
+        # resample height command
+        h_min, h_max = self.cfg.commands.ranges.height
+        self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand(
+            (len(env_ids),), device=self.device
+        )
+
+        if len(env_ids) > 0:
+            h_cmd = self.commands[env_ids, 3].flatten()
+            h_now = self.base_height[env_ids].flatten()
+            self.prev_height_error[env_ids] = torch.abs(h_now - h_cmd)
+        self.lie_down_done[env_ids] = False
+
+    def _pre_decimation_step(self):
+        super()._pre_decimation_step()
+        # self.grf = self._compute_grf()
+        self.compute_osc_slope()
+
+    def compute_osc_slope(self):
+        cmd_x = torch.abs(self.commands[:, 0:1]) - self.cfg.osc.stop_threshold
+        stop = cmd_x < 0
+
+        self.osc_offset = stop * self.cfg.osc.offset
+        self.osc_omega = stop * self.cfg.osc.omega_stop
+        self.osc_coupling = stop * self.cfg.osc.coupling_stop
+
+        self.osc_omega += (~stop) * torch.clamp(
+            cmd_x * self.cfg.osc.omega_slope + self.cfg.osc.omega_step,
+            min=0.0,
+            max=self.cfg.osc.omega_max,
+        )
+        self.osc_coupling += (~stop) * torch.clamp(
+            cmd_x * self.cfg.osc.coupling_slope + self.cfg.osc.coupling_step,
+            min=0.0,
+            max=self.cfg.osc.coupling_max,
+        )
+
+        self.osc_omega = torch.clamp_min(self.osc_omega, 0.1)
+        self.osc_coupling = torch.clamp_min(self.osc_coupling, 0)
+
+    def _post_decimation_step(self):
+        """Update all states that are not handled in PhysX"""
+        super()._post_decimation_step()
+        self.grf = self._compute_grf()
+        # self._step_oscillators()
+
+    def _post_physx_step(self):
+        super()._post_physx_step()
+        self._step_oscillators(self.dt / self.cfg.control.decimation)
+        return None
+
+    def _step_oscillators(self, dt=None):
+        if dt is None:
+            dt = self.dt
+
+        local_feedback = self.osc_coupling * (
+            torch.cos(self.oscillators) + self.osc_offset
+        )
+        grf = self._compute_grf()
+        self.oscillators_vel = self.osc_omega - grf * local_feedback
+        self.oscillators_vel += (
+            torch.randn(self.oscillators_vel.shape, device=self.device)
+            * self.cfg.osc.process_noise_std
+        )
+
+        self.oscillators_vel *= 2 * torch.pi
+        self.oscillators += self.oscillators_vel * dt
+        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
+        self.oscillator_obs = torch.cat(
+            (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
+        )
+
+    def _resample_commands(self, env_ids):
+        """Randommly select commands of some environments
+
+        Args:
+            env_ids (List[int]): Environments ids for which new commands are needed
+        """
+        if len(env_ids) == 0:
+            return
+        super()._resample_commands(env_ids)
+        possible_commands = torch.tensor(
+            self.command_ranges["lin_vel_x"], device=self.device
+        )
+        self.commands[env_ids, 0:1] = possible_commands[
+            torch.randint(
+                0, len(possible_commands), (len(env_ids), 1), device=self.device
+            )
+        ]
+        # add some gaussian noise to the commands
+        self.commands[env_ids, 0:1] += (
+            torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var
+        )
+
+        if 0 in self.cfg.commands.ranges.lin_vel_x:
+            # * with 20% chance, reset to 0 commands except for forward
+            self.commands[env_ids, 1:] *= (
+                torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+                < 0.8
+            ).unsqueeze(1)
+            # * with 20% chance, reset to 0 commands except for rotation
+            self.commands[env_ids, :2] *= (
+                torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+                < 0.8
+            ).unsqueeze(1)
+            # * with 10% chance, reset to 0
+            self.commands[env_ids, :] *= (
+                torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+                < 0.9
+            ).unsqueeze(1)
+
+        if self.cfg.osc.randomize_osc_params:
+            self._resample_osc_params(env_ids)
+
+        # resample height command
+        h_min, h_max = self.cfg.commands.ranges.height
+        self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand(
+            (len(env_ids),), device=self.device
+        )
+
+    def _resample_osc_params(self, env_ids):
+        if len(env_ids) > 0:
+            self.osc_omega[env_ids, 0] = torch_rand_float(
+                self.cfg.osc.omega_range[0],
+                self.cfg.osc.omega_range[1],
+                (len(env_ids), 1),
+                device=self.device,
+            ).squeeze(1)
+            self.osc_coupling[env_ids, 0] = torch_rand_float(
+                self.cfg.osc.coupling_range[0],
+                self.cfg.osc.coupling_range[1],
+                (len(env_ids), 1),
+                device=self.device,
+            ).squeeze(1)
+            self.osc_offset[env_ids, 0] = torch_rand_float(
+                self.cfg.osc.offset_range[0],
+                self.cfg.osc.offset_range[1],
+                (len(env_ids), 1),
+                device=self.device,
+            ).squeeze(1)
+
+    def perturb_base_velocity(self, velocity_delta, env_ids=None):
+        if env_ids is None:
+            env_ids = [range(self.num_envs)]
+        self.root_states[env_ids, 7:10] += velocity_delta
+        self.gym.set_actor_root_state_tensor(
+            self.sim, gymtorch.unwrap_tensor(self.root_states)
+        )
+
+    def _compute_grf(self, grf_norm=True):
+        grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
+        if grf_norm:
+            return torch.clamp_max(grf / HORSE_WEIGHT, 1.0)
+        else:
+            return grf
+
+    def _switch(self):
+        c_vel = torch.linalg.norm(self.commands, dim=1)
+        return torch.exp(
+            -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
+            / self.cfg.reward_settings.switch_scale
+        )
+
+    def _reward_lin_vel_z(self):
+        """Penalize z axis base linear velocity with squared exp"""
+        return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"][0])
+
+    def _reward_ang_vel_xy(self):
+        """Penalize xy axes base angular velocity"""
+        error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"][0])
+        return torch.sum(error, dim=1)
+
+    # modified this method so joint index list uses horse 5 DOF legs, not 3 DOF
+    def _reward_cursorial(self):
+        """
+        Encourage legs under body, joints stay neutral, avoid inverted/folded poses
+        Penalize key joints drifting from home. For a horse with 5 joints per leg,
+        we penalize (haa, hfe, kfe) for each of the 4 legs.
+        Distal joint (pfe, pastern_to_foot) naturally flex a lot, shock absorbers
+        """
+        legs = torch.tensor(list(range(1, 21)), device=self.device)
+        return -torch.mean(torch.square(self.dof_pos[:, legs]), dim=1)
+
+    def _reward_swing_grf(self):
+        # Reward non-zero grf during swing (0 to pi)
+        rew = self.get_swing_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool)
+        return -torch.sum(rew, dim=1)
+
+    def _reward_stance_grf(self):
+        # Reward non-zero grf during stance (pi to 2pi)
+        rew = self.get_stance_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool)
+        return torch.sum(rew, dim=1)
+
+    def get_swing_grf(self, osc_bool=False, contact_bool=False):
+        if osc_bool:
+            phase = torch.lt(self.oscillators, torch.pi).int()
+        else:
+            phase = torch.maximum(
+                torch.zeros_like(self.oscillators), torch.sin(self.oscillators)
+            )
+        if contact_bool:
+            return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold)
+        else:
+            return phase * self._compute_grf()
+
+    def get_stance_grf(self, osc_bool=False, contact_bool=False):
+        if osc_bool:
+            phase = torch.gt(self.oscillators, torch.pi).int()
+        else:
+            phase = torch.maximum(
+                torch.zeros_like(self.oscillators), -torch.sin(self.oscillators)
+            )
+        if contact_bool:
+            return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold)
+        else:
+            return phase * self._compute_grf()
+
+    def _reward_coupled_grf(self):
+        """
+        Multiply rewards for stance/swing grf, discount when undesirable
+        behavior (grf during swing, no grf during stance)
+        """
+        swing_rew = self.get_swing_grf()
+        stance_rew = self.get_stance_grf()
+        combined_rew = self._sqrdexp(swing_rew * 2) + stance_rew
+        prod = torch.prod(torch.clip(combined_rew, 0, 1), dim=1)
+        return prod - torch.ones_like(prod)
+
+    def _reward_dof_vel(self):
+        """Penalize dof velocities"""
+        return super()._reward_dof_vel() * self._switch()
+
+    def _reward_dof_near_home(self):
+        return super()._reward_dof_near_home() * self._switch()
+
+    def _reward_stand_still(self):
+        """Penalize any movement when at zero commands."""
+        # normalize angles so we care about being within ~5 degrees
+        rew_pos = torch.mean(
+            self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1
+        )
+        rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
+        rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
+        rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
+        return (rew_vel + rew_pos - rew_base_vel) * self._switch()
+
+    def _reward_standing_torques(self):
+        """Penalize torques at zero commands"""
+        return super()._reward_torques() * self._switch()
+
+    # * gait similarity scores
+    def angle_difference(self, theta1, theta2):
+        diff = torch.abs(theta1 - theta2) % (2 * torch.pi)
+        return torch.min(diff, 2 * torch.pi - diff)
+
+    def _reward_tracking_height(self):
+        """Reward for base height."""
+        # error between current and commanded height
+        error = self.base_height.flatten() - self.commands[:, 3].flatten()
+        error /= self.scales["base_height"]
+
+        return self._sqrdexp(error)
+
+    def _reward_tendon_constraints(self):
+        """
+        Tendon-like coupling constraints (front/hind split):
+        - Desired KFE/PFE become dependant on HFE.
+        - Penalize deviation from those desired values.
+        """
+
+        # --- helpers ---
+        def lerp(x, x0, x1, y0, y1):
+            """Linear interpolation y(x) between (x0,y0)->(x1,y1), with clamping."""
+            # handle degenerate interval
+            denom = x1 - x0
+            denom = torch.where(torch.abs(denom) < 1e-8, torch.ones_like(denom), denom)
+            t = (x - x0) / denom
+            t = torch.clamp(t, 0.0, 1.0)
+            return y0 + t * (y1 - y0)
+
+        def piecewise_2seg(x, mid, x_lo, x_hi, y_lo, y_mid, y_hi):
+            """
+            Piecewise linear passing through:
+            (x=mid, y=y_mid)
+            and reaching:
+            (x=x_lo, y=y_lo) for x <= mid
+            (x=x_hi, y=y_hi) for x >= mid
+            """
+            y_left = lerp(
+                x, mid, x_lo, y_mid, y_lo
+            )  # x in [x_lo, mid] (or beyond -> clamped)
+            y_right = lerp(
+                x, mid, x_hi, y_mid, y_hi
+            )  # x in [mid, x_hi] (or beyond -> clamped)
+            return torch.where(x <= mid, y_left, y_right)
+
+        # make desired match (N,2) like the actual joint tensor
+        def _match_legs(desired, actual_legs):
+            # actual_legs: (N,2)
+            if desired.dim() == 0:
+                return desired.view(1, 1).expand_as(actual_legs)
+            if desired.dim() == 1:
+                return desired.unsqueeze(1).expand_as(actual_legs)  # (N,) -> (N,2)
+            if desired.dim() == 2:
+                # if already (N,2) it's good
+                return desired
+            raise RuntimeError(f"Unexpected desired shape: {tuple(desired.shape)}")
+
+        # Hind constraints (RH, LH)
+        hfe_hind = self.dof_pos[:, self.idx["hind_hfe"]]  # [N,2]
+        kfe_hind = self.dof_pos[:, self.idx["hind_kfe"]]
+        pfe_hind = self.dof_pos[:, self.idx["hind_pfe"]]
+
+        # 3) hind hfe: 0 -> -1.5  => kfe: 0 -> +1.0, pfe: 0 -> -1.2
+        # 4) hind hfe: 0 -> +0.5  => kfe: 0 -> -0.2, pfe: 0 -> -0.5
+        kfe_hind_des = piecewise_2seg(
+            hfe_hind,
+            mid=torch.zeros_like(hfe_hind),  # at hfe=0
+            x_lo=-1.5 * torch.ones_like(hfe_hind),
+            x_hi=+0.5 * torch.ones_like(hfe_hind),
+            y_lo=+1.0 * torch.ones_like(hfe_hind),
+            y_mid=0.0 * torch.ones_like(hfe_hind),
+            y_hi=-0.2 * torch.ones_like(hfe_hind),
+        )
+
+        pfe_hind_des = piecewise_2seg(
+            hfe_hind,
+            mid=torch.zeros_like(hfe_hind),
+            x_lo=-1.5 * torch.ones_like(hfe_hind),
+            x_hi=+0.5 * torch.ones_like(hfe_hind),
+            y_lo=-1.2 * torch.ones_like(hfe_hind),
+            y_mid=0.0 * torch.ones_like(hfe_hind),
+            y_hi=-0.5 * torch.ones_like(hfe_hind),
+        )
+
+        # Front constraints (RF, LF)
+        hfe_front = self.dof_pos[:, self.idx["front_hfe"]]  # [N,2]
+        kfe_front = self.dof_pos[:, self.idx["front_kfe"]]
+        pfe_front = self.dof_pos[:, self.idx["front_pfe"]]
+
+        # 5) front hfe: 0 -> +0.6 => kfe: 0 -> 0, pfe: 0 -> 0
+        # 6) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +1.5
+        # 7) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +3.0
+
+        zeros = torch.zeros_like(hfe_front)
+        ones = torch.ones_like(hfe_front)
+
+        # kfe: flat 0 for hfe in [0, +0.6], ramp to -1.5 for hfe in [-1.0, 0]
+        kfe_front_des = piecewise_2seg(
+            hfe_front,
+            mid=zeros,
+            x_lo=-1.0 * ones,
+            x_hi=+0.6 * ones,
+            y_lo=-1.5 * ones,  # at hfe=-1.0
+            y_mid=0.0 * ones,  # at hfe=0
+            y_hi=0.0 * ones,  # at hfe=+0.6
+        )
+
+        # pfe: flat 0 for hfe in [0, +0.6], ramp to +1.5 for hfe in [-1.0, 0]
+        pfe_front_des_15 = piecewise_2seg(
+            hfe_front,
+            mid=zeros,
+            x_lo=-1.0 * ones,
+            x_hi=+0.6 * ones,
+            y_lo=+1.5 * ones,  # at hfe=-1.0
+            y_mid=0.0 * ones,  # at hfe=0
+            y_hi=0.0 * ones,  # at hfe=+0.6
+        )
+
+        # pfe: flat 0 for hfe in [0, +0.6], ramp to +3.0 for hfe in [-1.0, 0]
+        pfe_front_des_30 = piecewise_2seg(
+            hfe_front,
+            mid=zeros,
+            x_lo=-1.0 * ones,
+            x_hi=+0.6 * ones,
+            y_lo=+3.0 * ones,  # at hfe=-1.0
+            y_mid=0.0 * ones,  # at hfe=0
+            y_hi=0.0 * ones,  # at hfe=+0.6
+        )
+
+        # --- HIND ---
+        kfe_hind_des_ = _match_legs(kfe_hind_des, kfe_hind)  # -> (N,2)
+        pfe_hind_des_ = _match_legs(pfe_hind_des, pfe_hind)  # -> (N,2)
+        hind_pen = (kfe_hind - kfe_hind_des_) ** 2 + (
+            pfe_hind - pfe_hind_des_
+        ) ** 2  # (N,2)
+
+        # --- FRONT ---
+        # KFE: flat 0 for hfe in [0, 0.6], ramp to -1.5 for hfe in [-1,0]
+        kfe_front_des_ = _match_legs(kfe_front_des, kfe_front)  # -> (N,2)
+        kfe_err = (kfe_front - kfe_front_des_) ** 2  # (N,2)
+
+        # negative HFE:
+        #   - PFE: 0 -> +1.5  (hfe 0 -> -1)
+        #   - PFE: 0 -> +3.0  (hfe 0 -> -1)
+        pfe_front_des15_ = _match_legs(pfe_front_des_15, pfe_front)  # -> (N,2)
+        pfe_front_des30_ = _match_legs(pfe_front_des_30, pfe_front)  # -> (N,2)
+
+        pfe_err15 = (pfe_front - pfe_front_des15_) ** 2
+        pfe_err30 = (pfe_front - pfe_front_des30_) ** 2
+
+        # BOTH branches acceptable always:
+        pfe_term = torch.minimum(pfe_err15, pfe_err30)  # (N,2)
+
+        front_pen = kfe_err + pfe_term  # (N,2)
+
+        # --- aggregate per env ---
+        pen = hind_pen.mean(dim=1) + front_pen.mean(dim=1)  # (N,)
+        return -pen
+
+    def _reset_to_custom_pos(self, env_ids, reset_h):
+        env_ids = env_ids.to(dtype=torch.long, device=self.device)
+        B = env_ids.numel()
+        if B == 0:
+            return
+
+        # --- normalize reset_h to (B,) on device ---
+        if not torch.is_tensor(reset_h):
+            reset_h = torch.tensor(
+                reset_h, device=self.device, dtype=torch.float32
+            ).repeat(B)
+        else:
+            reset_h = reset_h.to(device=self.device, dtype=torch.float32)
+            if reset_h.numel() == 1:
+                reset_h = reset_h.repeat(B)
+            elif reset_h.shape[0] != B:
+                # if passed full (num_envs,), subset it
+                if reset_h.shape[0] == self.num_envs:
+                    reset_h = reset_h[env_ids]
+                else:
+                    raise RuntimeError(
+                        f"reset_h shape {reset_h.shape} incompatible env_id batch {B}"
+                    )
+
+        # env_ids must be LongTensor on device
+        env_ids = env_ids.to(device=self.device, dtype=torch.long)
+
+        # Get a per-env base pose, regardless of how default_dof_pos is stored
+        ddp = self.default_dof_pos
+
+        if ddp.dim() == 1:
+            # (num_dofs,) -> make (B, num_dofs)
+            base_pose = ddp.unsqueeze(0).repeat(env_ids.numel(), 1).clone()
+        elif ddp.dim() == 2 and ddp.shape[0] == 1:
+            # (1, num_dofs) -> expand to (B, num_dofs)
+            base_pose = ddp.expand(env_ids.numel(), -1).clone()
+        elif ddp.dim() == 2 and ddp.shape[0] == self.num_envs:
+            # (num_envs, num_dofs) -> index by env ids
+            base_pose = ddp[env_ids].clone()
+        else:
+            raise RuntimeError(f"Unexpected default_dof_pos shape: {tuple(ddp.shape)}")
+
+        # Decide stand vs lie by z pos
+        h_mid = 0.7
+
+        use_stand = reset_h >= h_mid  # (B,) bool
+
+        stand_pose = base_pose.clone()
+        lie_pose = base_pose.clone()
+
+        # DOF map: 0 base_joint
+        # RH: 1..5, LH: 6..10, RF: 11..15, LF: 16..20 (haa,hfe,kfe,pfe,pastern)
+
+        pos_range = self.cfg.init_state.dof_pos_range
+
+        for base in [1, 6, 11, 16]:
+            haa = base + 0
+            hfe = base + 1
+            kfe = base + 2
+            pfe = base + 3
+            pas = base + 4
+
+            # standing (tune)
+            stand_pose[:, haa] = pos_range["haa"][0] + (
+                pos_range["haa"][1] - pos_range["haa"][0]
+            ) * torch.rand((lie_pose.shape[0],), device=self.device)
+            stand_pose[:, hfe] = pos_range["hfe"][0] + (
+                pos_range["hfe"][1] - pos_range["hfe"][0]
+            ) * torch.rand((lie_pose.shape[0],), device=self.device)
+            stand_pose[:, kfe] = pos_range["kfe"][0] + (
+                pos_range["kfe"][1] - pos_range["kfe"][0]
+            ) * torch.rand((lie_pose.shape[0],), device=self.device)
+            stand_pose[:, pfe] = pos_range["pfe"][0] + (
+                pos_range["pfe"][1] - pos_range["pfe"][0]
+            ) * torch.rand((lie_pose.shape[0],), device=self.device)
+            stand_pose[:, pas] = pos_range["pastern_to_foot"][0] + (
+                pos_range["pastern_to_foot"][1] - pos_range["pastern_to_foot"][0]
+            ) * torch.rand((lie_pose.shape[0],), device=self.device)
+
+        # front kneel (tune)
+        for base in [11, 16]:
+            haa = base + 0
+            hfe = base + 1
+            kfe = base + 2
+            pfe = base + 3
+            pas = base + 4
+            lie_pose[:, haa] = 0.0
+            lie_pose[:, hfe] = -1.0 + (0.2 - -1.0) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+            lie_pose[:, kfe] = -1.5 + (-0.5 - -1.5) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+            lie_pose[:, pfe] = -0.3 + (3.0 - -0.3) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+            lie_pose[:, pas] = -0.3 + (1.8 - -0.3) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+
+        # hind tuck (tune)
+        for base in [1, 6]:
+            haa = base + 0
+            hfe = base + 1
+            kfe = base + 2
+            pfe = base + 3
+            pas = base + 4
+            lie_pose[:, haa] = 0.0
+            lie_pose[:, hfe] = -1.5 + (-1 - -1.5) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+            lie_pose[:, kfe] = -0.2 + (1 - -0.2) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+            lie_pose[:, pfe] = 0.0 + (-1.2 - 0.0) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+            lie_pose[:, pas] = -0.3 + (1.8 - -0.3) * torch.rand(
+                (lie_pose.shape[0],), device=self.device
+            )
+
+        new_pose = torch.where(use_stand.unsqueeze(1), stand_pose, lie_pose)
+
+        # clamp to limits (dof_pos_limits: (num_dof, 2))
+        lo = self.dof_pos_limits[:, 0].unsqueeze(0)
+        hi = self.dof_pos_limits[:, 1].unsqueeze(0)
+        new_pose = torch.max(torch.min(new_pose, hi), lo)
+
+        # apply to buffers
+        self.dof_pos[env_ids] = new_pose
+        self.dof_vel[env_ids] = 0.0
+
+        if (
+            hasattr(self, "dof_state")
+            and self.dof_state.dim() == 3
+            and self.dof_state.shape[2] >= 2
+        ):
+            self.dof_state[env_ids, :, 0] = new_pose
+            self.dof_state[env_ids, :, 1] = 0.0
diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py
new file mode 100644
index 00000000..436af043
--- /dev/null
+++ b/gym/envs/horse/horse_osc_config.py
@@ -0,0 +1,312 @@
+from gym.envs.horse.horse_config import (
+    HorseCfg,
+    HorseRunnerCfg,
+)
+
+BASE_HEIGHT_REF = 1.3
+
+
+class HorseOscCfg(HorseCfg):
+    class env(HorseCfg.env):
+        num_envs = 2**12
+        num_actuators = 1 + 4 * 5
+        episode_length_s = 10
+
+    class terrain(HorseCfg.terrain):
+        mesh_type = "plane"
+        # mesh_type = 'trimesh'  # none, plane, heightfield or trimesh
+
+    class init_state(HorseCfg.init_state):
+        timeout_reset_ratio = 0.75
+        reset_mode = "reset_to_range"
+        # * default COM for basic initialization
+        pos = [0.0, 0.0, 1.35]  # x,y,z [m]
+        rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
+        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
+        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+        default_joint_angles = {
+            "haa": 0.0,
+            "hfe": 0.0,
+            "kfe": 0.0,
+            "pfe": 0.0,
+            "pastern_to_foot": 0.0,
+            "base_joint": 0.0,
+        }
+
+        # * initialization for random range setup
+        # these are the physical limits in the URDF as of 11 Dec 2025
+        # dof_pos_range = {
+        #   haa": [-0.2, 0.2],
+        #   f_hfe": [-1.0, 0.6],
+        #   h_hfe": [-1.5, 0.5],
+        #   f_kfe": [-1.5, 0.1],
+        #   h_kfe": [-0.2, 1.0],
+        #   f_pfe": [-0.3, 3.0],
+        #   h_pfe": [-1.2, 2.5],
+        #   f_pastern_to_foot": [-0.3, 1.8],
+        #   h_pastern_to_foot": [-0.3, 1.8],
+        #   base_joint": [-0.2, 0.2],
+        # }
+        dof_pos_range = {
+            "haa": [-0.2, 0.2],
+            "hfe": [-1.0, 0.5],
+            "kfe": [-0.2, 0.1],
+            "pfe": [-0.3, 2.5],
+            "pastern_to_foot": [-0.3, 1.8],
+            "base_joint": [-0.0, 0.0],
+        }
+        dof_vel_range = {
+            "haa": [-0.2, 0.2],
+            "hfe": [-0.2, 0.2],
+            "kfe": [-0.2, 0.2],
+            "pfe": [-0.2, 0.2],
+            "pastern_to_foot": [-0.2, 0.2],
+            "base_joint": [-0.2, 0.2],
+        }
+
+        root_pos_range = [
+            [0.0, 0.0],  # x
+            [0.0, 0.0],  # y
+            [0.6, 1.35],  # z
+            [-0.0, 0.0],  # roll
+            [-0.0, 0.0],  # pitch
+            [-0.2, 0.2],  # yaw
+        ]
+        root_vel_range = [
+            [0.0, 0.0],  # x
+            [0.0, 0.0],  # y
+            [0.0, 0.0],  # z
+            [0.0, 0.0],  # roll
+            [0.0, 0.0],  # pitch
+            [0.0, 0.0],  # yaw
+        ]
+
+    class control(HorseCfg.control):
+        # * PD Drive parameters:
+        # HorseOscCfg.control
+        stiffness = {
+            "haa": 75,
+            "hfe": 40,
+            "kfe": 38,
+            "pfe": 10,
+            "pastern_to_foot": 5,
+            "base_joint": 50,  # still needs modifying
+        }
+        damping = {
+            "haa": 0.0001,
+            "hfe": 0.0001,
+            "kfe": 0.0001,
+            "pfe": 0.5,
+            "pastern_to_foot": 0.5,
+            "base_joint": 10,
+        }
+        ctrl_frequency = 250  # how often the PDF controller/action updates run
+        desired_sim_frequency = 1000  # how often the physics is calculated
+
+    class osc:  # <-------------------most likely needs tuning
+        process_noise_std = 0.25
+        grf_threshold = 0.1  # 20. # Normalized to body weight
+        # oscillator parameters
+        omega = 3  # gets overwritten
+        coupling = 1  # gets overwritten
+        osc_bool = False  # not used in paper
+        grf_bool = False  # not used in paper
+        randomize_osc_params = False
+        omega_range = [1.0, 4.0]  # [0.0, 10.]
+        coupling_range = [0.0, 1.0]
+        offset_range = [0.0, 0.0]
+        stop_threshold = 0.5
+        omega_stop = 1.0
+        omega_step = 2.0
+        omega_slope = 1.0
+        omega_max = 4.0
+        # coupling_step = 0.
+        # coupling_stop = 0.
+        coupling_stop = 4.0
+        coupling_step = 1.0
+        coupling_slope = 0.0
+        coupling_max = 1.0
+        offset = 1.0
+
+        init_to = "random"
+
+    class commands:
+        resampling_time = 3.0  # * time before command are changed[s]
+        var = 1.0
+
+        class ranges:
+            lin_vel_x = [-1.0, 0.0, 1.0, 3.0, 6.0]  # min max [m/s]
+            lin_vel_y = 1.0  # max [m/s]
+            yaw_vel = 6  # max [rad/s]
+            height = [0.0, 1.30]  # m, command range of height during training
+
+    class push_robots:
+        toggle = False
+        interval_s = 1
+        max_push_vel_xy = 0.5
+        push_box_dims = [0.3, 0.1, 0.1]  # x,y,z [m]
+
+    class domain_rand:
+        randomize_friction = True
+        friction_range = [0.4, 1.0]
+        randomize_base_mass = False
+        lower_mass_offset = -0.5  # kg
+        upper_mass_offset = 2.0
+        lower_z_offset = 0.0  # m
+        upper_z_offset = 0.2
+        lower_x_offset = 0.0
+        upper_x_offset = 0.0
+
+    class asset(HorseCfg.asset):
+        shank_length_diff = 0  # Units in cm
+        file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf"
+        foot_name = "foot"
+        penalize_contacts_on = ["thigh"]  # "thigh", "shank", "pastern"
+        terminate_after_contacts_on = ["top"]
+        collapse_fixed_joints = False
+        fix_base_link = False
+        self_collisions = 1
+        flip_visual_attachments = False
+        disable_gravity = False
+        disable_motors = False
+        joint_damping = 0.3
+
+    class reward_settings(HorseCfg.reward_settings):
+        soft_dof_pos_limit = 0.9
+        soft_dof_vel_limit = 0.9
+        soft_torque_limit = 0.9
+        max_contact_force = 2000.0  # testing, this was too low for a horse weight
+        base_height_target = BASE_HEIGHT_REF + 0.03
+        tracking_sigma = 0.25
+        switch_scale = 0.5
+
+    class scaling(HorseCfg.scaling):
+        base_ang_vel = [0.3, 0.3, 0.1]
+        base_lin_vel = BASE_HEIGHT_REF
+        dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5])
+        base_height = BASE_HEIGHT_REF
+        dof_pos = [0.2] + (
+            4 * [0.4, 1.3, 1.4, 2.5, 2.1]
+        )  # reducing this to be alot smaller 2.1 or 2.5
+        dof_pos_obs = dof_pos
+        # dof_pos_target = [2.0 * x for x in dof_pos]  # target joint angles
+        dof_pos_target = [0.4] + (4 * [0.8, 2.6, 2.8, 5.0, 4.2])
+        tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300])  # not being used
+        commands = [3, 1, 3, 1]  # add height as a command
+
+
+class HorseOscRunnerCfg(HorseRunnerCfg):
+    seed = -1
+    runner_class_name = "OnPolicyRunner"
+
+    class actor(HorseRunnerCfg.actor):
+        hidden_dims = [256, 256, 128]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+        obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "oscillator_obs",
+            "dof_pos_target",
+            #  "osc_omega",
+            #  "osc_coupling"
+            #  "oscillators_vel",
+            #  "grf",
+        ]
+        normalize_obs = True
+        actions = ["dof_pos_target"]
+        add_noise = True
+        disable_actions = False
+
+        class noise:
+            scale = 1.0
+            dof_pos_obs = 0.01
+            base_ang_vel = 0.01
+            dof_pos = 0.005
+            dof_vel = 0.005
+            lin_vel = 0.05
+            ang_vel = [0.3, 0.15, 0.4]
+            gravity_vec = 0.1
+
+    class critic(HorseRunnerCfg.critic):
+        hidden_dims = [128, 64]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+        obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "oscillator_obs",
+            "oscillators_vel",
+            "dof_pos_target",
+        ]
+        normalize_obs = True
+
+        class reward:
+            class weights:
+                tracking_lin_vel = 4.0
+                tracking_ang_vel = 2.0
+                lin_vel_z = 0.0
+                ang_vel_xy = 0.01
+                orientation = 1.0
+                torques = 5.0e-10
+                dof_vel = 0.0
+                # min_base_height = 1.0
+                action_rate = 1e-5
+                action_rate2 = 1e-6
+                stand_still = 0.0
+                dof_pos_limits = 0.0
+                feet_contact_forces = 0.0
+                dof_near_home = 0.0
+                swing_grf = 5.0
+                stance_grf = 5.0
+                swing_velocity = 0.0
+                stance_velocity = 0.0
+                coupled_grf = 1.0  # penalize for grf during swing, no grf during stance
+                enc_pace = 0.0
+                cursorial = 1.0  # enourage legs to stay under body, don't splay out
+                standing_torques = 0.0  # 1.e-5
+                tracking_height = 5.0
+                tendon_constraints = 1.0
+
+            class termination_weight:
+                termination = 15.0 / 100.0
+
+    class algorithm:
+        # both
+        gamma = 0.99
+        lam = 0.95
+        # shared
+        batch_size = 2**15
+        max_gradient_steps = 24
+        # new
+        storage_size = 2**17  # new
+        batch_size = 2**15  #  new
+
+        clip_param = 0.2
+        learning_rate = 1.0e-3
+        max_grad_norm = 1.0
+        # Critic
+        use_clipped_value_loss = True
+        # Actor
+        entropy_coef = 0.01
+        schedule = "adaptive"  # could be adaptive, fixed
+        desired_kl = 0.01
+        lr_range = [2e-4, 1e-2]
+        lr_ratio = 1.3
+
+    class runner(HorseRunnerCfg.runner):
+        run_name = ""
+        experiment_name = "horse_osc"
+        max_iterations = 1000
+        algorithm_class_name = "PPO2"
+        num_steps_per_env = 32
diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py
index 3dc1507f..7608e080 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_config.py
@@ -119,7 +119,7 @@ class scaling(LeggedRobotCfg.scaling):
         dof_pos_obs = dof_pos
         dof_pos_target = 4 * [0.2, 0.3, 0.3]
         tau_ff = 4 * [18, 18, 28]
-        commands = [3, 1, 3]
+        commands = [3, 1, 3, 1]
 
 
 class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg):
diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py
index 77866ca3..f2b64409 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_osc.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py
@@ -1,5 +1,4 @@
 import torch
-import numpy as np
 from isaacgym.torch_utils import torch_rand_float
 
 from gym.envs.mini_cheetah.mini_cheetah import MiniCheetah
@@ -129,31 +128,6 @@ def compute_osc_slope(self):
         self.osc_omega = torch.clamp_min(self.osc_omega, 0.1)
         self.osc_coupling = torch.clamp_min(self.osc_coupling, 0)
 
-    def _process_rigid_body_props(self, props, env_id):
-        if env_id == 0:
-            # * init buffers for the domain rand changes
-            self.mass = torch.zeros(self.num_envs, 1, device=self.device)
-            self.com = torch.zeros(self.num_envs, 3, device=self.device)
-
-        # * randomize mass
-        if self.cfg.domain_rand.randomize_base_mass:
-            lower = self.cfg.domain_rand.lower_mass_offset
-            upper = self.cfg.domain_rand.upper_mass_offset
-            # self.mass_
-            props[0].mass += np.random.uniform(lower, upper)
-            self.mass[env_id] = props[0].mass
-            # * randomize com position
-            lower = self.cfg.domain_rand.lower_z_offset
-            upper = self.cfg.domain_rand.upper_z_offset
-            props[0].com.z += np.random.uniform(lower, upper)
-            self.com[env_id, 2] = props[0].com.z
-
-            lower = self.cfg.domain_rand.lower_x_offset
-            upper = self.cfg.domain_rand.upper_x_offset
-            props[0].com.x += np.random.uniform(lower, upper)
-            self.com[env_id, 0] = props[0].com.x
-        return props
-
     def _post_decimation_step(self):
         """Update all states that are not handled in PhysX"""
         super()._post_decimation_step()
@@ -174,19 +148,13 @@ def _step_oscillators(self, dt=None):
         )
         grf = self._compute_grf()
         self.oscillators_vel = self.osc_omega - grf * local_feedback
-        # self.oscillators_vel *= torch_rand_float(0.9,
-        #                                          1.1,
-        #                                          self.oscillators_vel.shape,
-        #                                          self.device)
         self.oscillators_vel += (
             torch.randn(self.oscillators_vel.shape, device=self.device)
             * self.cfg.osc.process_noise_std
         )
 
         self.oscillators_vel *= 2 * torch.pi
-        self.oscillators += (
-            self.oscillators_vel * dt
-        )  # torch.clamp(self.oscillators_vel * dt, min=0)
+        self.oscillators += self.oscillators_vel * dt
         self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
         self.oscillator_obs = torch.cat(
             (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
@@ -214,17 +182,6 @@ def _resample_commands(self, env_ids):
             torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var
         )
 
-        # possible_commands = torch.tensor(self.command_ranges["lin_vel_y"],
-        #                                  device=self.device)
-        # self.commands[env_ids, 1:2] = possible_commands[torch.randint(
-        #     0, len(possible_commands), (len(env_ids), 1),
-        #     device=self.device)]
-        # possible_commands = torch.tensor(self.command_ranges["yaw_vel"],
-        #                                  device=self.device)
-        # self.commands[env_ids, 0:1] = possible_commands[torch.randint(
-        #     0, len(possible_commands), (len(env_ids), 1),
-        #     device=self.device)]
-
         if 0 in self.cfg.commands.ranges.lin_vel_x:
             # * with 20% chance, reset to 0 commands except for forward
             self.commands[env_ids, 1:] *= (
diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
index 2c32f06b..e8796147 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
@@ -163,7 +163,7 @@ class scaling(MiniCheetahCfg.scaling):
         dof_pos_target = 4 * [0.2, 0.3, 0.3]
         tau_ff = 4 * [18, 18, 28]  # hip-abad, hip-pitch, knee
         # commands = [base_lin_vel, base_lin_vel, base_ang_vel]
-        commands = [3, 1, 3]  # [base_lin_vel, base_lin_vel, base_ang_vel]
+        commands = [3, 1, 3, 1]  # [base_lin_vel, base_lin_vel, base_ang_vel]
 
 
 class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg):
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref.py b/gym/envs/mini_cheetah/mini_cheetah_ref.py
index 57e1f0e7..a1a3ca13 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref.py
@@ -59,8 +59,7 @@ def _switch(self):
     def _reward_swing_grf(self):
         """Reward non-zero grf during swing (0 to pi)"""
         in_contact = torch.gt(
-            torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1),
-            50.0,
+            torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0
         )
         ph_off = torch.lt(self.phase, torch.pi)
         rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)
@@ -69,8 +68,7 @@ def _reward_swing_grf(self):
     def _reward_stance_grf(self):
         """Reward non-zero grf during stance (pi to 2pi)"""
         in_contact = torch.gt(
-            torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1),
-            50.0,
+            torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0
         )
         ph_off = torch.gt(self.phase, torch.pi)  # should this be in swing?
         rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)
diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py
index d2e3ce99..ce740f50 100644
--- a/gym/utils/interfaces/KeyboardInterface.py
+++ b/gym/utils/interfaces/KeyboardInterface.py
@@ -15,6 +15,10 @@ def __init__(self, env):
         env.gym.subscribe_viewer_keyboard_event(
             env.viewer, gymapi.KEY_SPACE, "space_shoot"
         )
+        env.gym.subscribe_viewer_keyboard_event(env.viewer, gymapi.KEY_UP, "height_up")
+        env.gym.subscribe_viewer_keyboard_event(
+            env.viewer, gymapi.KEY_DOWN, "height_down"
+        )
         env.gym.subscribe_viewer_mouse_event(
             env.viewer, gymapi.MOUSE_LEFT_BUTTON, "mouse_shoot"
         )
@@ -24,6 +28,7 @@ def __init__(self, env):
         print("WASD: forward, strafe left, backward, strafe right")
         print("QE: yaw left/right")
         print("R: reset environments")
+        print("Arrow Up/Down: stand up/lay down")
         print("ESC: quit")
         print("______________________________________________________________")
 
@@ -39,6 +44,10 @@ def __init__(self, env):
         self.max_vel_yaw = 2.0
         self.increment_yaw = self.max_vel_yaw * 0.2
 
+        self.min_height = -1000
+        self.max_height = 1.3
+        self.increment_height = 0.1
+
     def update(self, env):
         for evt in env.gym.query_viewer_action_events(env.viewer):
             if evt.value == 0:
@@ -73,6 +82,16 @@ def update(self, env):
                     env.commands[:, 2] + self.increment_yaw,
                     max=self.max_vel_yaw,
                 )
+            elif evt.action == "height_up":
+                env.commands[:, 3] = torch.clamp(
+                    env.commands[:, 3] + self.increment_height,
+                    max=self.max_height,
+                )
+            elif evt.action == "height_down":
+                env.commands[:, 3] = torch.clamp(
+                    env.commands[:, 3] - self.increment_height,
+                    min=self.min_height,
+                )
             elif evt.action == "QUIT":
                 env.exit = True
             elif evt.action == "RESET":
diff --git a/resources/robots/horse/urdf/horse.urdf b/resources/robots/horse/urdf/horse.urdf
new file mode 100644
index 00000000..22d3eb3f
--- /dev/null
+++ b/resources/robots/horse/urdf/horse.urdf
@@ -0,0 +1,605 @@
+<?xml version="1.0" ?>
+<robot name="mini_cheetah">
+ 	<link name="top">
+		<inertial>
+			<mass value="1"/>
+			<origin xyz="0.0 0.0 0.0"/>
+			<inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+		</inertial>
+		<visual>
+			<origin xyz="-0.315 0 0.26" rpy="0 0 0"/>
+			<geometry>
+				<box size="1.45 0.4 0.1"/>
+			</geometry>
+			<material name="brown">
+				<color rgba="0.5 0.3 0.2 0.5"/>
+			</material>
+		</visual>
+		<collision>
+			<origin xyz="-0.315 0 0.26" rpy="0 0 0"/>
+			<geometry>
+				<box size="1.45 0.4 0.1"/>
+			</geometry>
+		</collision>
+	</link>
+		<joint name="dummy_joint" type="fixed">
+		<axis xyz="0 0 0"/>
+		<origin xyz="0 0 0" rpy="0 0 0"/>
+		<parent link="top"/>
+		<child link="front_base"/>
+		<limit effort="0" lower="0" upper="0" velocity="0"/>
+	</joint>
+	<link name="front_base">
+		<inertial>
+			<mass value="345"/>
+			<origin xyz="0.0 0.0 0.0"/>
+			<inertia ixx="46.853123" ixy="0" ixz="0" iyy="1507.352382" iyz="0" izz="177.673807"/>
+		</inertial>
+		<visual>
+			<origin xyz="0 0 0" rpy="0 0 0"/>
+			<geometry>
+				<box size="0.8 0.38 0.61"/>
+			</geometry>
+			<material name="brown">
+				<color rgba="0.5 0.3 0.2 0.5"/>
+			</material>
+		</visual>
+		<collision>
+			<origin xyz="0 0 0.05" rpy="0 0 0"/>
+			<geometry>
+				<box size=".8 0.3 0.51"/>
+			</geometry>
+		</collision>
+	</link>
+	<joint name="0_base_joint" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin xyz="-0.5 0 0.3" rpy="0 0 0"/>
+		<parent link="front_base"/>
+		<child link="back_base"/>
+		<limit effort="1100" lower="-0.2" upper="0.2" velocity="41"/>
+	</joint>
+	<link name="back_base">
+		<inertial>
+			<mass value="145"/>
+			<origin xyz="0.0 0.0 0.0"/>
+			<inertia ixx="46.853123" ixy="0" ixz="0" iyy="1507.352382" iyz="0" izz="177.673807"/>
+		</inertial>
+		<visual>
+			<origin xyz="-0.27 0 -0.3" rpy="0 0 0"/>
+			<geometry>
+				<box size="0.51 0.38 0.61"/>
+			</geometry>
+			<material name="brown">
+				<color rgba="0.5 0.3 0.2 0.5"/>
+			</material>
+		</visual>
+		<collision>
+			<origin xyz="-0.32 0 -0.25" rpy="0 0 0"/>
+			<geometry>
+				<box size="0.41 0.38 0.51"/>
+			</geometry>
+		</collision>
+	</link>
+	<!-- RF Leg -->
+	<!-- RF Hip and HAA -->
+	<joint name="3_rf_haa" type="revolute">
+		<axis xyz="-1 0 0"/>
+		<origin xyz="0.35 -0.225 0.0" rpy="0 0 0"/>
+		<axis xyz="1 0 0"/>
+		<parent link="front_base"/>
+		<child link="rf_hip"/>
+		<limit effort="1000" lower="-0.2" upper="0.2" velocity="41"/>
+	</joint>
+	<link name="rf_hip">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.055 0.0 0."/>
+			<inertia ixx="0.013332" ixy="0.002030" ixz="0.00001575" iyy="0.019595" iyz="0.000033" izz="0.015536"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 0.0" rpy="1.5708 0 0"/>
+			<geometry>
+				<cylinder length="0.05" radius="0.075"/>
+			</geometry>
+		</visual>
+	</link>
+	<!-- LF Thigh and HFE -->
+	<joint name="rf_hfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 0.6 0.0" xyz="0.0 -0.12 -0.05"/>
+		<parent link="rf_hip"/>
+		<child link="rf_thigh"/>
+		<limit effort="1000" lower="-1.0" upper="0.6" velocity="41"/>
+	</joint>
+	<link name="rf_thigh">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.0 0.0 -0.02"/>
+			<inertia ixx="0.078" ixy="0.006561" ixz="0.000348" iyy="0.061" iyz="0.000040" izz="0.11"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 -0.2"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.075"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.2"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.075"/>
+			</geometry>
+		</collision>
+	</link>
+	<!-- LF Shank and KFE -->
+	<joint name="rf_kfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 -0.6 0.0" xyz="0.0 0.0 -0.45"/>
+		<parent link="rf_thigh"/>
+		<child link="rf_shank"/>
+		<limit effort="1000" lower="-1.5" upper="0.1" velocity="26.8"/>
+	</joint>
+	<link name="rf_shank">
+		<inertial>
+			<mass value="1.404"/>
+			<origin xyz="0.0 0.0 -0.061"/>
+			<inertia ixx="0.022" ixy="0" ixz="0" iyy="0.120" iyz="0" izz="0.129"/>
+		</inertial>
+		<visual>
+			<geometry>
+				<cylinder length="0.25" radius="0.04"/>
+			</geometry>
+			<origin xyz="0.05 0.0 -0.15"/>
+		</visual>
+		<collision>
+			<geometry>
+				<cylinder length="0.25" radius="0.04"/>
+			</geometry>
+			<origin xyz="0.05 0.0 -0.15"/>
+		</collision>
+	</link>
+	<!-- LF Pastern and PFE -->
+	<joint name="rf_pfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.05 0 0.0" xyz="0.0 0.0 -0.3"/>
+		<parent link="rf_shank"/>
+		<child link="rf_pastern"/>
+		<limit effort="500" lower="-0.3" upper="3.0" velocity="26.8"/>
+	</joint>
+	<link name="rf_pastern">
+		<visual>
+			<origin xyz="0.02 0.0 -0.14"/>
+			<geometry>
+				<cylinder length="0.15" radius="0.03"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.02 0.0 -0.14"/>
+			<geometry>
+				<cylinder length="0.15" radius="0.03"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<mass value="0.875"/>
+			<origin xyz="0.05 0.0 -0.061"/>
+			<inertia ixx="0.0014" ixy="0" ixz="0" iyy="0.0147" iyz="0" izz="0.0146"/>
+		</inertial>
+	</link>
+	<!-- LF FOOT -->
+	<joint name="rf_pastern_to_foot" type="revolute">
+		<axis xyz="0 1 0"/>
+		<parent link="rf_pastern"/>
+		<child link="rf_foot"/>
+		<origin rpy="0.0 0.0 0.0" xyz="0.025 0.0 -0.3"/>
+		<limit lower="-0.3" upper="1.8" effort="300" velocity="2.0"/>
+	</joint>
+	<link name="rf_foot">
+		<visual>
+			<origin xyz="0.0 0.0 -0.025"/>
+			<geometry>
+				<cylinder length="0.02" radius="0.05"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.025"/>
+			<geometry>
+				<cylinder length="0.02" radius="0.05"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<origin rpy="0 0 0" xyz="0 0 0.0"/>
+			<mass value="0.20"/>
+			<inertia ixx="0.0028" ixy="0" ixz="0" iyy="0.0096" iyz="0" izz="0.0091"/>
+		</inertial>
+	</link>
+	<!-- LF Leg -->
+	<!-- LF Hip and HAA -->
+	<joint name="4_lf_haa" type="revolute">
+		<axis xyz="1 0 0"/>
+		<origin xyz="0.35 0.225 0.0" rpy="0 0 0"/>
+		<axis xyz="1 0 0"/>
+		<parent link="front_base"/>
+		<child link="lf_hip"/>
+		<limit effort="1000" lower="-0.2" upper="0.2" velocity="41"/>
+	</joint>
+	<link name="lf_hip">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.055 0.0 0."/>
+			<inertia ixx="0.013332" ixy="0.002030" ixz="0.00001575" iyy="0.019595" iyz="0.000033" izz="0.015536"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 0.0" rpy="1.5708 0 0"/>
+			<geometry>
+				<cylinder length="0.05" radius="0.075"/>
+			</geometry>
+		</visual>
+	</link>
+	<!-- LF Thigh and HFE -->
+	<joint name="lf_hfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 0.6 0.0" xyz="0.0 0.12 -0.05"/>
+		<parent link="lf_hip"/>
+		<child link="lf_thigh"/>
+		<limit effort="1000" lower="-1.0" upper="0.6" velocity="41"/>
+	</joint>
+	<link name="lf_thigh">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.0 0.0 -0.02"/>
+			<inertia ixx="0.078" ixy="0.006561" ixz="0.000348" iyy="0.061" iyz="0.000040" izz="0.11"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 -0.2"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.075"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.2"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.075"/>
+			</geometry>
+		</collision>
+	</link>
+	<!-- LF Shank and KFE -->
+	<joint name="lf_kfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 -0.6 0.0" xyz="0.0 0.0 -0.45"/>
+		<parent link="lf_thigh"/>
+		<child link="lf_shank"/>
+		<limit effort="1000" lower="-1.5" upper="0.1" velocity="26.8"/>
+	</joint>
+	<link name="lf_shank">
+		<inertial>
+			<mass value="1.404"/>
+			<origin xyz="0.0 0.0 -0.061"/>
+			<inertia ixx="0.022" ixy="0" ixz="0" iyy="0.120" iyz="0" izz="0.129"/>
+		</inertial>
+		<visual>
+			<geometry>
+				<cylinder length="0.25" radius="0.04"/>
+			</geometry>
+			<origin xyz="0.05 0.0 -0.15"/>
+		</visual>
+		<collision>
+			<geometry>
+				<cylinder length="0.25" radius="0.04"/>
+			</geometry>
+			<origin xyz="0.05 0.0 -0.15"/>
+		</collision>
+	</link>
+	<!-- LF Pastern and PFE -->
+	<joint name="lf_pfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.05 0 0.0" xyz="0.0 0.0 -0.3"/>
+		<parent link="lf_shank"/>
+		<child link="lf_pastern"/>
+		<limit effort="500" lower="-0.3" upper="3.0" velocity="26.8"/>
+	</joint>
+	<link name="lf_pastern">
+		<visual>
+			<origin xyz="0.02 0.0 -0.14"/>
+			<geometry>
+				<cylinder length="0.15" radius="0.03"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.02 0.0 -0.14"/>
+			<geometry>
+				<cylinder length="0.15" radius="0.03"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<mass value="0.875"/>
+			<origin xyz="0.05 0.0 -0.061"/>
+			<inertia ixx="0.0014" ixy="0" ixz="0" iyy="0.0147" iyz="0" izz="0.0146"/>
+		</inertial>
+	</link>
+	<!-- LF FOOT -->
+	<joint name="lf_pastern_to_foot" type="revolute">
+		<axis xyz="0 1 0"/>
+		<parent link="lf_pastern"/>
+		<child link="lf_foot"/>
+		<origin rpy="0.0 0.0 0.0" xyz="0.025 0.0 -0.3"/>
+		<limit lower="-0.3" upper="1.8" effort="300" velocity="2.0"/>
+	</joint>
+	<link name="lf_foot">
+		<visual>
+			<origin xyz="0.0 0.0 -0.025"/>
+			<geometry>
+				<cylinder length="0.02" radius="0.05"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.025"/>
+			<geometry>
+				<cylinder length="0.02" radius="0.05"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<origin rpy="0 0 0" xyz="0 0 0.0"/>
+			<mass value="0.20"/>
+			<inertia ixx="0.0028" ixy="0" ixz="0" iyy="0.0096" iyz="0" izz="0.0091"/>
+		</inertial>
+	</link>
+	<!-- RH Leg -->
+	<!-- RH Hip and HAA -->
+	<joint name="1_rh_haa" type="revolute">
+		<axis xyz="1 0 0"/>
+		<origin rpy="0 3.141592 0" xyz="-0.3 -0.225 -0.15"/>
+		<parent link="back_base"/>
+		<child link="rh_hip"/>
+		<limit effort="1000" lower="-0.2" upper="0.2" velocity="41"/>
+	</joint>
+	<link name="rh_hip">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.055 0.0 0.0"/>
+			<inertia ixx="0.013332" ixy="0.002030" ixz="0.00001575" iyy="0.019595" iyz="0.000033" izz="0.015536"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 0.0" rpy="1.5708 0 0"/>
+			<geometry>
+				<cylinder length="0.05" radius="0.075"/>
+			</geometry>
+		</visual>
+	</link>
+	<!-- LH Thigh and HFE -->
+	<joint name="rh_hfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 2.841592 0.0" xyz="0.0 -0.12 -0.05"/>
+		<parent link="rh_hip"/>
+		<child link="rh_thigh"/>
+		<limit effort="1000" lower="-1.5" upper="0.5" velocity="41"/>
+	</joint>
+	<link name="rh_thigh">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.0 0.016 -0.02"/>
+			<inertia ixx="0.078" ixy="0.006561" ixz="0.000348" iyy="0.061" iyz="0.000040" izz="0.11"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 -0.2"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.09"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.3"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.09"/>
+			</geometry>
+		</collision>
+	</link>
+	<!-- LH Shank and KFE -->
+	<joint name="rh_kfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 0.65 0.0" xyz="0.0 0.0 -0.5"/>
+		<parent link="rh_thigh"/>
+		<child link="rh_shank"/>
+		<limit effort="1000" lower="-0.2" upper="1.0" velocity="26.8"/>
+	</joint>
+	<link name="rh_shank">
+		<inertial>
+			<mass value="1.404"/>
+			<origin xyz="0.0 0.0 -0.061"/>
+			<inertia ixx="0.022" ixy="0" ixz="0" iyy="0.120" iyz="0" izz="0.129"/>
+		</inertial>
+		<visual>
+			<geometry>
+				<cylinder length="0.2" radius="0.04"/>
+			</geometry>
+			<origin xyz="-0.05 0.0 -0.15"/>
+		</visual>
+		<collision>
+			<geometry>
+				<cylinder length="0.2" radius="0.04"/>
+			</geometry>
+			<origin xyz="-0.05 0.0 -0.15"/>
+		</collision>
+	</link>
+	<!-- LH Pastern and PFE  -->
+	<joint name="rh_pfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 -0.35 0.0" xyz="-0.045 0.0 -0.35"/>
+		<parent link="rh_shank"/>
+		<child link="rh_pastern"/>
+		<limit effort="500" lower="-1.2" upper="2.5" velocity="26.8"/>
+	</joint>
+	<link name="rh_pastern">
+		<visual>
+			<origin xyz="0.0 0.0 -0.15"/>
+			<geometry>
+				<cylinder length="0.25" radius="0.03"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.15"/>
+			<geometry>
+				<cylinder length="0.25" radius="0.03"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<mass value="0.875"/>
+			<origin xyz="0.0 0.0 -0.061"/>
+			<inertia ixx="0.0014" ixy="0" ixz="0" iyy="0.0147" iyz="0" izz="0.0146"/>
+		</inertial>
+	</link>
+	<!-- LH FOOT  -->
+	<joint name="rh_pastern_to_foot" type="revolute">
+		<axis xyz="0 1 0"/>
+		<parent link="rh_pastern"/>
+		<child link="rh_foot"/>
+		<origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 -0.35"/>
+		<limit lower="-0.3" upper="1.8" effort="300" velocity="2.0"/>
+	</joint>
+	<link name="rh_foot">
+		<visual>
+			<origin xyz="0.0 0.0 -0.03"/>
+			<geometry>
+				<cylinder length="0.04" radius="0.05"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.03"/>
+			<geometry>
+				<cylinder length="0.04" radius="0.05"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<origin rpy="0 0 0" xyz="0 0 0.0"/>
+			<mass value="0.20"/>
+			<inertia ixx="0.0028" ixy="0" ixz="0" iyy="0.0096" iyz="0" izz="0.0091"/>
+		</inertial>
+	</link>
+	<!-- LH Leg -->
+	<!-- LH Hip and HAA -->
+	<joint name="2_lh_haa" type="revolute">
+		<axis xyz="-1 0 0"/>
+		<origin rpy="0 3.141592 0" xyz="-0.3 0.225 -0.15"/>
+		<parent link="back_base"/>
+		<child link="lh_hip"/>
+		<limit effort="1000" lower="-0.2" upper="0.2" velocity="41"/>
+	</joint>
+	<link name="lh_hip">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.055 0.0 0.0"/>
+			<inertia ixx="0.013332" ixy="0.002030" ixz="0.00001575" iyy="0.019595" iyz="0.000033" izz="0.015536"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 0.0" rpy="1.5708 0 0"/>
+			<geometry>
+				<cylinder length="0.05" radius="0.075"/>
+			</geometry>
+		</visual>
+	</link>
+	<!-- LH Thigh and HFE -->
+	<joint name="lh_hfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 2.841592 0.0" xyz="0.0 0.12 -0.05"/>
+		<parent link="lh_hip"/>
+		<child link="lh_thigh"/>
+		<limit effort="1000" lower="-1.5" upper="0.5" velocity="41"/>
+	</joint>
+	<link name="lh_thigh">
+		<inertial>
+			<mass value="4.558"/>
+			<origin xyz="0.0 0.016 -0.02"/>
+			<inertia ixx="0.078" ixy="0.006561" ixz="0.000348" iyy="0.061" iyz="0.000040" izz="0.11"/>
+		</inertial>
+		<visual>
+			<origin xyz="0.0 0.0 -0.2"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.09"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.3"/>
+			<geometry>
+				<cylinder length="0.2" radius="0.09"/>
+			</geometry>
+		</collision>
+	</link>
+	<!-- LH Shank and KFE -->
+	<joint name="lh_kfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 0.65 0.0" xyz="0.0 0.0 -0.5"/>
+		<parent link="lh_thigh"/>
+		<child link="lh_shank"/>
+		<limit effort="1000" lower="-0.2" upper="1.0" velocity="26.8"/>
+	</joint>
+	<link name="lh_shank">
+		<inertial>
+			<mass value="1.404"/>
+			<origin xyz="0.0 0.0 -0.061"/>
+			<inertia ixx="0.022" ixy="0" ixz="0" iyy="0.120" iyz="0" izz="0.129"/>
+		</inertial>
+		<visual>
+			<geometry>
+				<cylinder length="0.2" radius="0.04"/>
+			</geometry>
+			<origin xyz="-0.05 0.0 -0.15"/>
+		</visual>
+		<collision>
+			<geometry>
+				<cylinder length="0.2" radius="0.04"/>
+			</geometry>
+			<origin xyz="-0.05 0.0 -0.15"/>
+		</collision>
+	</link>
+	<!-- LH Pastern and PFE  -->
+	<joint name="lh_pfe" type="revolute">
+		<axis xyz="0 1 0"/>
+		<origin rpy="0.0 -0.35 0.0" xyz="-0.045 0.0 -0.35"/>
+		<parent link="lh_shank"/>
+		<child link="lh_pastern"/>
+		<limit effort="500" lower="-1.2" upper="2.5" velocity="26.8"/>
+	</joint>
+	<link name="lh_pastern">
+		<visual>
+			<origin xyz="0.0 0.0 -0.15"/>
+			<geometry>
+				<cylinder length="0.25" radius="0.03"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.15"/>
+			<geometry>
+				<cylinder length="0.25" radius="0.03"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<mass value="0.875"/>
+			<origin xyz="0.0 0.0 -0.061"/>
+			<inertia ixx="0.0014" ixy="0" ixz="0" iyy="0.0147" iyz="0" izz="0.0146"/>
+		</inertial>
+	</link>
+	<!-- LH FOOT  -->
+	<joint name="lh_pastern_to_foot" type="revolute">
+		<axis xyz="0 1 0"/>
+		<parent link="lh_pastern"/>
+		<child link="lh_foot"/>
+		<origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 -0.35"/>
+		<limit lower="-0.3" upper="1.8" effort="300" velocity="2.0"/>
+	</joint>
+	<link name="lh_foot">
+		<visual>
+			<origin xyz="0.0 0.0 -0.03"/>
+			<geometry>
+				<cylinder length="0.04" radius="0.05"/>
+			</geometry>
+		</visual>
+		<collision>
+			<origin xyz="0.0 0.0 -0.03"/>
+			<geometry>
+				<cylinder length="0.04" radius="0.05"/>
+			</geometry>
+		</collision>
+		<inertial>
+			<origin rpy="0 0 0" xyz="0 0 0.0"/>
+			<mass value="0.20"/>
+			<inertia ixx="0.0028" ixy="0" ixz="0" iyy="0.0096" iyz="0" izz="0.0091"/>
+		</inertial>
+	</link>
+</robot>
\ No newline at end of file
diff --git a/scripts/analyze_joint_scaling.py b/scripts/analyze_joint_scaling.py
new file mode 100644
index 00000000..1de90d03
--- /dev/null
+++ b/scripts/analyze_joint_scaling.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+import numpy as np
+import os
+import matplotlib.pyplot as plt
+import csv
+
+OUT_DIR = "scaling_analysis"
+os.makedirs(OUT_DIR, exist_ok=True)
+
+LOG_FILE = "joint_logs.npz"
+
+# load data
+d = np.load(LOG_FILE, allow_pickle=True)
+
+actual = d["actual_pos"]  # [envs, steps, joints]
+target = d["target_pos"]
+joint_names = d["joint_name"]
+
+num_envs, total_steps, num_joints = actual.shape
+
+# episode length
+if "step" in d:
+    step_arr = d["step"]
+    valid = np.where(step_arr != 0)[0]
+    actual_steps = valid[-1] + 1 if len(valid) > 0 else 1
+else:
+    tmp = target[0]
+    nz = np.where(np.any(tmp != 0, axis=1))[0]
+    actual_steps = nz[-1] + 1
+
+print(f"Trimming timestep dimension from {total_steps} -> {actual_steps}")
+
+# trim arrays
+actual = actual[:, :actual_steps, :]
+target = target[:, :actual_steps, :]
+
+# raw error
+error = target - actual  # [envs, steps, joints]
+abs_error = np.abs(error)
+
+# shape: (num_envs * steps, num_joints)
+flat_err = abs_error.reshape(-1, num_joints)
+
+# mean absolute error for each joint
+mae = np.mean(flat_err, axis=0)
+
+# standard deviation of absolute error for each joint
+std_err = np.std(flat_err, axis=0)
+
+# csv stats
+csv_path = os.path.join(OUT_DIR, "error_summary.csv")
+with open(csv_path, "w", newline="") as f:
+    w = csv.writer(f)
+    w.writerow(["joint_idx", "joint_name", "MAE_all_envs", "STD_all_envs"])
+    for j in range(num_joints):
+        w.writerow(
+            [
+                j,
+                joint_names[j],
+                float(mae[j]),
+                float(std_err[j]),
+            ]
+        )
+
+print(f"Saved raw error summary (all envs): {csv_path}")
+
+# histogram with all env
+all_errors = error.reshape(-1, num_joints)
+
+for j in range(num_joints):
+    name = joint_names[j]
+    plt.figure(figsize=(10, 4))
+    plt.hist(all_errors[:, j], bins=80)
+    plt.title(f"{name} – Raw Error Histogram (All Envs Combined)")
+    plt.xlabel("target_pos - actual_pos")
+    plt.ylabel("count")
+    plt.grid(True, alpha=0.3)
+
+    out = os.path.join(OUT_DIR, f"{j:02d}_{name}_hist.png")
+    plt.savefig(out, dpi=200)
+    plt.close()
+
+print(f"Saved histograms in: {OUT_DIR}")
diff --git a/scripts/play.py b/scripts/play.py
index 833e6641..46d3bb46 100644
--- a/scripts/play.py
+++ b/scripts/play.py
@@ -5,6 +5,10 @@
 
 # torch needs to be imported after isaacgym imports in local source
 import torch
+import numpy as np
+import os
+
+BASE_HEIGHT_REF = 1.3
 
 
 def setup(args):
@@ -29,7 +33,120 @@ def setup(args):
     return env, runner, train_cfg
 
 
+def create_obs_logging_dict(env, obs_vars, num_steps):
+    """
+    Create a dictionary to log raw and scaled observation data.
+
+    Returns:
+        obs_log: dict with keys:
+                 {var}_raw  -> [num_envs, num_steps, ...]
+                 {var}_scaled -> [num_envs, num_steps, ...]
+    """
+    num_envs = env.num_envs
+    obs_log = {}
+
+    for var in obs_vars:
+        val = getattr(env, var)
+        shape = (num_envs, num_steps) + val.shape[1:]  # preserve timestep array shape
+
+        # allocate tensors
+        obs_log[f"{var}_raw"] = torch.zeros(shape, device=val.device)
+        obs_log[f"{var}_scaled"] = torch.zeros_like(obs_log[f"{var}_raw"])
+
+    return obs_log
+
+
+def log_obs_step(env, obs_log, obs_vars, step_idx):
+    for var in obs_vars:
+        obs_log[f"{var}_raw"][:, step_idx, ...] = getattr(env, var)
+        obs_log[f"{var}_scaled"][:, step_idx, ...] = env.get_state(var)
+
+
+def create_logging_dict(env, num_steps):
+    # creates a dictionary of tensors to store joint data for each timestep.
+    num_envs = env.num_envs
+    num_dofs = env.num_dof
+
+    log_dict = {
+        "step": torch.zeros((num_steps,), dtype=torch.int32, device=env.device),
+        "target_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device),
+        "actual_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device),
+        "torque": torch.zeros((num_envs, num_steps, num_dofs), device=env.device),
+    }
+
+    # Get joint names
+    joint_names = (
+        env.dof_names
+        if hasattr(env, "dof_names")
+        else [f"joint_{i}" for i in range(num_dofs)]
+    )
+    log_dict["joint_name"] = joint_names
+
+    return log_dict
+
+
 def play(env, runner, train_cfg):
+    num_steps = int(env.max_episode_length)
+    log_data = create_logging_dict(env, num_steps)
+
+    obs_vars = [
+        "base_height",
+        "base_lin_vel",
+        "base_ang_vel",
+        "projected_gravity",
+        "commands",
+        "dof_pos_obs",
+        "dof_vel",
+        "dof_pos_target",
+    ]
+
+    obs_log = create_obs_logging_dict(env, obs_vars, num_steps)
+
+    # --- height reward logging ---
+    height_rew = np.zeros((num_steps,), dtype=np.float32)
+    height_actual = np.zeros((num_steps,), dtype=np.float32)
+    height_target = np.zeros((num_steps,), dtype=np.float32)
+
+    # ---------- record init pos/height ----------
+    OUT_DIR = "play_logs"
+    os.makedirs(OUT_DIR, exist_ok=True)
+
+    # num dofs (don't rely on env.num_dofs, use tensor shape)
+    num_dofs = env.dof_pos.shape[1]
+
+    # Capture *one-time* init snapshot for all envs
+    # root z: prefer root_states[:,2] if it exists; otherwise fall back to base_height
+    if hasattr(env, "root_states"):
+        init_root_z = env.root_states[:, 2].detach().cpu().numpy().astype(np.float32)
+    else:
+        init_root_z = env.base_height.detach().cpu().numpy().astype(np.float32)
+
+    # commanded height at start
+    init_cmd_h = env.commands[:, 3].detach().cpu().numpy().astype(np.float32)
+    # starting joint positions
+    init_dof_pos = env.dof_pos.detach().cpu().numpy().astype(np.float32)
+
+    # Build a single 2D table: (num_envs, 2 + num_dofs)
+    # col0 = init_root_z, col1 = init_cmd_h, col2.. = dof_pos
+    init_table = np.zeros((env.num_envs, 2 + num_dofs), dtype=np.float32)
+    init_table[:, 0] = init_root_z
+    init_table[:, 1] = init_cmd_h
+    init_table[:, 2:] = init_dof_pos
+
+    # Column names/joint names
+    if hasattr(env, "dof_names"):
+        dof_names = list(env.dof_names)
+    else:
+        # fallback: generic names
+        dof_names = [f"dof_{i}" for i in range(num_dofs)]
+
+    col_names = ["init_root_z", "init_cmd_height"] + [f"init_{n}" for n in dof_names]
+
+    # track actual number of simulation steps
+    actual_steps = 0
+    # track and print commanded height changes
+    last_height_cmd = None
+
     # * set up recording
     if env.cfg.viewer.record:
         recorder = VisualizationRecorder(
@@ -41,18 +158,104 @@ def play(env, runner, train_cfg):
     if COMMANDS_INTERFACE:
         # interface = GamepadInterface(env)
         interface = KeyboardInterface(env)
-    for i in range(10 * int(env.max_episode_length)):
-        if COMMANDS_INTERFACE:
-            interface.update(env)
-        if env.cfg.viewer.record:
-            recorder.update(i)
-        runner.set_actions(
-            runner.actor_cfg["actions"],
-            runner.get_inference_actions(),
-            runner.actor_cfg["disable_actions"],
+
+        # resample height command only
+        h_min, h_max = env.cfg.commands.ranges.height
+        env.commands[:, 3] = h_min + (h_max - h_min) * torch.rand(
+            (env.num_envs,), device=env.device
         )
-        env.step()
-        env.check_exit()
+
+    try:
+        for i in range(10 * int(env.max_episode_length)):
+            if COMMANDS_INTERFACE:
+                interface.update(env)
+            if env.cfg.viewer.record:
+                recorder.update(i)
+
+            # print target/actual height (m)
+            target_height = env.commands[0, 3].item()
+            if (last_height_cmd is None) or (
+                abs(target_height - last_height_cmd) > 1e-6
+            ):
+                actual_height = env.base_height[0].item()
+
+                print(
+                    f"[HEIGHT] actual = {actual_height:.3f} m | "
+                    f"target = {target_height:.3f} m | "
+                    f"error = {actual_height - target_height:.3f} | "
+                    f"descend mode = {(env.commands[:, 3][0]) < 1.0}"
+                )
+                last_height_cmd = target_height
+
+            runner.set_actions(
+                runner.actor_cfg["actions"],
+                runner.get_inference_actions(),
+                runner.actor_cfg["disable_actions"],
+            )
+            env.step()
+
+            # log only actual simulation steps
+            if actual_steps < num_steps:
+                # log joints
+                log_data["step"][actual_steps] = actual_steps
+                log_data["target_pos"][:, actual_steps, :] = env.dof_pos_target
+                log_data["actual_pos"][:, actual_steps, :] = env.dof_pos
+                log_data["torque"][:, actual_steps, :] = env.torques
+
+                # log observations
+                log_obs_step(env, obs_log, obs_vars, actual_steps)
+
+                # _reward_tracking_height returns (num_envs,) tensor
+                r = env._reward_tracking_height()  # torch tensor
+                height_rew[actual_steps] = r[0].item()
+                height_actual[actual_steps] = env.base_height[0].item()
+                height_target[actual_steps] = env.commands[0, 3].item()
+
+                actual_steps += 1
+
+            env.check_exit()  # user exit or viewer closed
+
+    except KeyboardInterrupt:
+        print("\n[INFO] Interrupted by user, saving logs...")
+    except SystemExit:
+        print("\n[INFO] Viewer closed, saving logs...")
+    finally:
+        # slice to actual steps before saving
+        log_data_cpu = {
+            k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v)
+            for k, v in log_data.items()
+        }
+        for key in ["step", "target_pos", "actual_pos", "torque"]:
+            log_data_cpu[key] = log_data_cpu[key][:actual_steps]
+
+        np.savez_compressed("joint_logs.npz", **log_data_cpu)
+        print(f"\nSaved joint log to joint_logs.npz ({actual_steps} steps)")
+
+        obs_log_cpu = {
+            k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v)
+            for k, v in obs_log.items()
+        }
+        for key in obs_log_cpu.keys():
+            obs_log_cpu[key] = obs_log_cpu[key][:, :actual_steps, ...]
+
+        np.savez_compressed("obs_logs.npz", **obs_log_cpu)
+        print(f"\nSaved obs log to obs_logs.npz ({actual_steps} steps)")
+
+    # ---------- save init data ----------
+    init_npz_path = os.path.join(OUT_DIR, "init_snapshot.npz")
+    np.savez_compressed(
+        init_npz_path,
+        init_table=init_table,
+        col_names=np.array(col_names, dtype=object),
+    )
+    print(f"[saved] init snapshot -> {init_npz_path}")
+
+    # save as csv
+    init_csv_path = os.path.join(OUT_DIR, "init_snapshot.csv")
+    with open(init_csv_path, "w") as f:
+        f.write(",".join(col_names) + "\n")
+        np.savetxt(f, init_table, delimiter=",", fmt="%.6f")
+    print(f"[saved] init snapshot csv -> {init_csv_path}")
 
 
 if __name__ == "__main__":
diff --git a/scripts/plot_height_rewards.py b/scripts/plot_height_rewards.py
new file mode 100644
index 00000000..967bf178
--- /dev/null
+++ b/scripts/plot_height_rewards.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import numpy as np
+import matplotlib.pyplot as plt
+
+LOG_FILE = "height_reward_logs.npz"
+OUT_PNG = "height_reward_plot.png"
+
+d = np.load(LOG_FILE)
+step = d["step"]
+height_rew = d["height_rew"]
+h_actual = d["height_actual"]
+h_target = d["height_target"]
+
+plt.figure(figsize=(12, 5))
+
+plt.plot(step, h_actual, label="actual height (m)")
+plt.plot(step, h_target, label="target height (m)")
+plt.plot(step, height_rew, label="_reward_tracking_height")
+
+plt.xlabel("step")
+plt.ylabel("value")
+plt.title("Height Tracking + Reward (Same Axis)")
+plt.grid(True, alpha=0.3)
+plt.legend()
+plt.tight_layout()
+plt.savefig(OUT_PNG, dpi=200)
+
+print(f"Saved plot -> {OUT_PNG}")
diff --git a/scripts/plot_logs_by_joint.py b/scripts/plot_logs_by_joint.py
new file mode 100644
index 00000000..a2952bf2
--- /dev/null
+++ b/scripts/plot_logs_by_joint.py
@@ -0,0 +1,244 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import re
+
+# config
+LOG_FILE = "joint_logs.npz"
+ENV_ID = 0
+SAVE_DIR = "plots_by_joint"
+
+# output folder
+os.makedirs(SAVE_DIR, exist_ok=True)
+
+# joint limits
+JOINT_LIMITS = {
+    r".*haa": [-0.2, 0.2],
+    r".*f_hfe": [-1.0, 0.6],
+    r".*h_hfe": [-1.5, 0.5],
+    r".*f_kfe": [-1.5, 0.1],
+    r".*h_kfe": [-0.2, 1.0],
+    r".*f_pfe": [-0.3, 3.0],
+    r".*h_pfe": [-1.2, 2.5],
+    r".*f_pastern_to_foot": [-0.3, 1.8],
+    r".*h_pastern_to_foot": [-0.3, 1.8],
+    r".*base_joint": [-0.2, 0.2],
+}
+
+
+def find_joint_limits(joint_name: str):
+    for pattern, limits in JOINT_LIMITS.items():
+        if re.match(pattern, joint_name):
+            return limits
+    return None
+
+
+data = np.load(LOG_FILE, allow_pickle=True)
+steps = data["step"]
+target_pos = data["target_pos"]
+actual_pos = data["actual_pos"]
+torques = data["torque"]
+joint_names = data["joint_name"]
+
+num_envs, num_steps, num_joints = actual_pos.shape
+print(f"Loaded log for {num_joints} joints, {num_steps} steps.")
+
+pattern = re.compile(r".*(rh|lh|rf|lf)_(haa|hfe|kfe|pfe|pastern_to_foot)$")
+
+joint_map = {}
+base_joints = {}
+
+for idx, name in enumerate(joint_names):
+    match = pattern.match(name)
+    if match:
+        leg, joint_type = match.groups()
+        leg, joint_type = leg.lower(), joint_type.lower()
+        joint_map.setdefault(joint_type, {})[leg] = idx
+    else:
+        # any joint name not matching the joint pattern will get its own plot
+        base_joints[name] = idx
+
+
+def plot_joint_type(joint_type, leg_to_idx):
+    # 4 rows (LF/RF pos+torque, LH/RH pos+torque) × 2 columns
+    fig, axs = plt.subplots(4, 2, figsize=(12, 12), constrained_layout=True)
+    axs = np.array(axs)
+
+    leg_pos = {
+        "lf": (0, 0),
+        "rf": (0, 1),
+        "lh": (2, 0),
+        "rh": (2, 1),
+    }
+
+    for leg, (pos_row, col) in leg_pos.items():
+        torque_row = pos_row + 1
+        if leg not in leg_to_idx:
+            axs[pos_row, col].axis("off")
+            axs[torque_row, col].axis("off")
+            continue
+
+        j = leg_to_idx[leg]
+
+        # find last index with nonzero torque (or position)
+        nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0]
+        last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps)
+
+        # position plot
+        pos_ax = axs[pos_row, col]
+        pos_ax.plot(
+            steps[:last_valid],
+            target_pos[ENV_ID, :last_valid, j],
+            linestyle="--",
+            linewidth=1.3,
+            label="Target Pos",
+        )
+        pos_ax.plot(
+            steps[:last_valid],
+            actual_pos[ENV_ID, :last_valid, j],
+            linewidth=1.3,
+            label="Actual Pos",
+        )
+
+        # add joint limit lines
+        limits = find_joint_limits(joint_names[j])
+        if limits is not None:
+            lo, hi = limits
+            pos_ax.axhline(
+                lo,
+                color="blue",
+                linestyle="--",
+                linewidth=1,
+                alpha=0.7,
+                label=f"Lower Limit ({lo:.2f})",
+            )
+            pos_ax.axhline(
+                hi,
+                color="red",
+                linestyle="--",
+                linewidth=1,
+                alpha=0.7,
+                label=f"Upper Limit ({hi:.2f})",
+            )
+
+            ymin, ymax = pos_ax.get_ylim()
+            ymin = min(ymin, lo - 0.05)
+            ymax = max(ymax, hi + 0.05)
+            pos_ax.set_ylim([ymin, ymax])
+
+        pos_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Position")
+        pos_ax.set_ylabel("Position (rad)")
+        pos_ax.grid(True, linestyle="--", alpha=0.4)
+        pos_ax.legend(fontsize=7)
+
+        # torque plot
+        torque_ax = axs[torque_row, col]
+        torque_ax.plot(
+            steps[:last_valid],
+            torques[ENV_ID, :last_valid, j],
+            color="tab:red",
+            linewidth=1.2,
+        )
+        torque_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Torque")
+        torque_ax.set_ylabel("Torque (Nm)")
+        torque_ax.set_xlabel("Step")
+        torque_ax.grid(True, linestyle="--", alpha=0.4)
+
+    plt.suptitle(f"{joint_type.upper()} Joints (All Legs)", fontsize=16)
+    save_path = os.path.join(SAVE_DIR, f"{joint_type}.png")
+    plt.savefig(save_path, dpi=200)
+    plt.close(fig)
+    print(f"Saved {save_path}")
+
+
+# plot function for base
+def plot_base_joints(base_joints):
+    fig, axs = plt.subplots(
+        len(base_joints) * 2,
+        1,
+        figsize=(10, 4 * len(base_joints)),
+        constrained_layout=True,
+    )
+
+    if len(base_joints) == 1:
+        axs = np.array([axs[0], axs[1]])
+
+    for i, (name, j) in enumerate(base_joints.items()):
+        pos_ax = axs[2 * i]
+        torque_ax = axs[2 * i + 1]
+
+        # find last index with nonzero torque (or position)
+        nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0]
+        last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps)
+
+        # position
+        pos_ax.plot(
+            steps[:last_valid],
+            target_pos[ENV_ID, :last_valid, j],
+            linestyle="--",
+            linewidth=1.3,
+            label="Target Pos",
+        )
+        pos_ax.plot(
+            steps[:last_valid],
+            actual_pos[ENV_ID, :last_valid, j],
+            linewidth=1.3,
+            label="Actual Pos",
+        )
+
+        # add joint limits
+        limits = find_joint_limits(name)
+        if limits is not None:
+            lo, hi = limits
+            pos_ax.axhline(
+                lo,
+                color="blue",
+                linestyle="--",
+                linewidth=1,
+                alpha=0.7,
+                label=f"Lower Limit ({lo:.2f})",
+            )
+            pos_ax.axhline(
+                hi,
+                color="red",
+                linestyle="--",
+                linewidth=1,
+                alpha=0.7,
+                label=f"Upper Limit ({hi:.2f})",
+            )
+
+            ymin, ymax = pos_ax.get_ylim()
+            ymin = min(ymin, lo - 0.05)
+            ymax = max(ymax, hi + 0.05)
+            pos_ax.set_ylim([ymin, ymax])
+
+        pos_ax.set_title(f"{name} Position")
+        pos_ax.set_ylabel("Position (rad)")
+        pos_ax.legend(fontsize=7)
+        pos_ax.grid(True, linestyle="--", alpha=0.4)
+
+        # torque
+        torque_ax.plot(
+            steps[:last_valid],
+            torques[ENV_ID, :last_valid, j],
+            color="tab:red",
+            linewidth=1.2,
+        )
+        torque_ax.set_title(f"{name} Torque")
+        torque_ax.set_ylabel("Torque (Nm)")
+        torque_ax.set_xlabel("Step")
+        torque_ax.grid(True, linestyle="--", alpha=0.4)
+
+    plt.suptitle("Base Joints", fontsize=16)
+    save_path = os.path.join(SAVE_DIR, "base_joints.png")
+    plt.savefig(save_path, dpi=200)
+    plt.close(fig)
+    print(f"Saved {save_path}")
+
+
+# generate plots
+for joint_type, leg_to_idx in joint_map.items():
+    plot_joint_type(joint_type, leg_to_idx)
+
+if base_joints:
+    plot_base_joints(base_joints)
diff --git a/scripts/plot_logs_by_leg.py b/scripts/plot_logs_by_leg.py
new file mode 100644
index 00000000..a12351cc
--- /dev/null
+++ b/scripts/plot_logs_by_leg.py
@@ -0,0 +1,97 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+# config
+LOG_FILE = "joint_logs.npz"
+ENV_ID = 0
+SAVE_DIR = "plots_by_leg"
+
+# output folder
+os.makedirs(SAVE_DIR, exist_ok=True)
+
+data = np.load(LOG_FILE, allow_pickle=True)
+steps = data["step"]
+target_pos = data["target_pos"]
+actual_pos = data["actual_pos"]
+torques = data["torque"]
+joint_names = data["joint_name"]
+
+num_envs, num_steps, num_joints = actual_pos.shape
+print(f"Loaded log for {num_joints} joints, {num_steps} steps.")
+
+# define grouping
+groups = {
+    "base": [0],
+    "right_hind": list(range(1, 6)),
+    "left_hind": list(range(6, 11)),
+    "right_front": list(range(11, 16)),
+    "left_front": list(range(16, 21)),
+}
+
+
+def plot_joint_group(group_name, joint_indices):
+    num_joints_group = len(joint_indices)
+    fig, axs = plt.subplots(
+        nrows=num_joints_group * 2,
+        ncols=1,
+        figsize=(10, 3 * num_joints_group * 2),
+        constrained_layout=True,
+    )
+
+    if num_joints_group == 1:
+        axs = np.array([axs[0], axs[1]])  # ensure consistent 2D array shape
+
+    for idx, j in enumerate(joint_indices):
+        pos_ax = axs[2 * idx]
+        torque_ax = axs[2 * idx + 1]
+
+        # find last index with nonzero torque (or position)
+        nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0]
+        if len(nonzero_idx) > 0:
+            last_valid = nonzero_idx[-1] + 1
+        else:
+            last_valid = len(steps)
+
+        # position
+        pos_ax.plot(
+            steps[:last_valid],
+            target_pos[ENV_ID, :last_valid, j],
+            label="Target Pos",
+            linestyle="--",
+            linewidth=1.5,
+        )
+        pos_ax.plot(
+            steps[:last_valid],
+            actual_pos[ENV_ID, :last_valid, j],
+            label="Actual Pos",
+            linewidth=1.5,
+        )
+        pos_ax.set_ylabel(f"{joint_names[j]} Pos (rad)")
+        pos_ax.legend(loc="upper right", fontsize=8)
+        pos_ax.grid(True, linestyle="--", alpha=0.4)
+
+        # torque
+        torque_ax.plot(
+            steps[:last_valid],
+            torques[ENV_ID, :last_valid, j],
+            color="tab:red",
+            linewidth=1.2,
+        )
+        torque_ax.set_ylabel(f"{joint_names[j]} Torque (Nm)")
+        torque_ax.grid(True, linestyle="--", alpha=0.4)
+
+    axs[-1].set_xlabel("Step")
+    plt.suptitle(f"{group_name.capitalize()} Joint Logs", fontsize=14)
+    save_path = os.path.join(SAVE_DIR, f"{group_name}.png")
+    plt.savefig(save_path, dpi=200)
+    plt.close(fig)
+    print(f"Saved {group_name}.png")
+
+
+# generate plots
+for group_name, joint_indices in groups.items():
+    if max(joint_indices) < num_joints:
+        plot_joint_group(group_name, joint_indices)
+    else:
+        print(f"Skipping {group_name}: exceeds joint index range.")
diff --git a/scripts/sweep_configs/sweep_horse_osc_config.json b/scripts/sweep_configs/sweep_horse_osc_config.json
new file mode 100644
index 00000000..df27bc5a
--- /dev/null
+++ b/scripts/sweep_configs/sweep_horse_osc_config.json
@@ -0,0 +1,58 @@
+{
+  "method": "random",
+  "metric": {
+    "name": "rewards/total_rewards",
+    "goal": "maximize"
+  },
+  "run_cap": 50,
+  "parameters": {
+    "train_cfg.runner.max_iterations": {
+      "values": [500]
+    },
+    "env_cfg.scaling.dof_pos": {
+        "values": [
+            [0.2, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1], 
+            [0.24, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52],
+            [0.288, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024],
+            [0.3456, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288],
+            [0.41472, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536],
+            [0.497664, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472]
+        ]
+    },
+
+    "env_cfg.scaling.dof_pos_target": {
+        "values": [
+            [0.4, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2],
+            [0.48, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04],
+            [0.576, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048],
+            [0.6912, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576],
+            [0.82944, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008],
+            [0.995328, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976]
+        ]
+    },
+
+    "env_cfg.control.stiffness": {
+        "values": [
+            {"haa": 4000, "hfe": 4000, "kfe": 4000, "pfe": 4000, "pastern_to_foot": 4000, "base_joint": 4000},
+            {"haa": 3000, "hfe": 3000, "kfe": 3000, "pfe": 3000, "pastern_to_foot": 3000, "base_joint": 3000},
+            {"haa": 2000, "hfe": 2000, "kfe": 2000, "pfe": 2000, "pastern_to_foot": 2000, "base_joint": 2000},
+            {"haa": 1000, "hfe": 1000, "kfe": 1000, "pfe": 1000, "pastern_to_foot": 1000, "base_joint": 1000},
+            {"haa": 500,  "hfe": 500,  "kfe": 500,  "pfe": 500,  "pastern_to_foot": 500,  "base_joint": 500},
+            {"haa": 250,  "hfe": 250,  "kfe": 250,  "pfe": 250,  "pastern_to_foot": 250,  "base_joint": 250},
+            {"haa": 100,  "hfe": 100,  "kfe": 100,  "pfe": 100,  "pastern_to_foot": 100,  "base_joint": 100},
+            {"haa": 50,   "hfe": 50,   "kfe": 50,   "pfe": 50,   "pastern_to_foot": 50,   "base_joint": 50}
+        ]
+    },
+
+    "env_cfg.control.damping": {
+        "values": [
+            {"haa": 400, "hfe": 400, "kfe": 400, "pfe": 400, "pastern_to_foot": 400, "base_joint": 400},
+            {"haa": 300, "hfe": 300, "kfe": 300, "pfe": 300, "pastern_to_foot": 300, "base_joint": 300},
+            {"haa": 250, "hfe": 250, "kfe": 250, "pfe": 250, "pastern_to_foot": 250, "base_joint": 250},
+            {"haa": 200, "hfe": 200, "kfe": 200, "pfe": 200, "pastern_to_foot": 200, "base_joint": 200},
+            {"haa": 100, "hfe": 100, "kfe": 100, "pfe": 100, "pastern_to_foot": 100, "base_joint": 100},
+            {"haa": 50,  "hfe": 50,  "kfe": 50,  "pfe": 50,  "pastern_to_foot": 50,  "base_joint": 50}
+        ]
+    }
+  }
+}