From 68aac8e09c2da66b877a28b01ec066cc8867c501 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 5 Sep 2025 20:09:55 -0700 Subject: [PATCH 01/35] add height training --- gym/envs/base/legged_robot.py | 3 ++- gym/envs/mini_cheetah/mini_cheetah.py | 18 ++++++++++++++++++ gym/envs/mini_cheetah/mini_cheetah_config.py | 6 ++++-- gym/utils/interfaces/KeyboardInterface.py | 19 +++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py index e02507d5..80d4b453 100644 --- a/gym/envs/base/legged_robot.py +++ b/gym/envs/base/legged_robot.py @@ -517,8 +517,9 @@ def _init_buffers(self): dtype=torch.float, device=self.device, ) + # add height as the 4th command self.commands = torch.zeros( - self.num_envs, 3, dtype=torch.float, device=self.device + self.num_envs, 4, dtype=torch.float, device=self.device ) self.base_lin_vel = quat_rotate_inverse( self.base_quat, self.root_states[:, 7:10] diff --git a/gym/envs/mini_cheetah/mini_cheetah.py b/gym/envs/mini_cheetah/mini_cheetah.py index bea90140..23738608 100644 --- a/gym/envs/mini_cheetah/mini_cheetah.py +++ b/gym/envs/mini_cheetah/mini_cheetah.py @@ -1,5 +1,6 @@ import torch +from isaacgym.torch_utils import torch_rand_float from gym.envs.base.legged_robot import LeggedRobot @@ -51,3 +52,20 @@ def _reward_dof_near_home(self): ), dim=1, ) + + def _resample_commands(self, env_ids): + super()._resample_commands(env_ids) + + # resample height + height_range = self.command_ranges["height"] + self.commands[env_ids, 3] = torch_rand_float( + height_range[0], height_range[1], (len(env_ids), 1), device=self.device + ).squeeze(1) + + def _reward_tracking_height(self): + """Reward for base height.""" + # error between current and commanded height + error = self.base_height.flatten() - self.commands[:, 3].flatten() + error /= self.scales["base_height"] + + return self._sqrdexp(error) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 157c6031..8a39a562 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -72,6 +72,7 @@ class ranges: lin_vel_x = [-2.0, 3.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] yaw_vel = 3 # max [rad/s] + height = [0.15, 0.35] # m class push_robots: toggle = True @@ -119,7 +120,7 @@ class scaling(LeggedRobotCfg.scaling): dof_pos_obs = dof_pos dof_pos_target = 4 * [0.2, 0.3, 0.3] tau_ff = 4 * [18, 18, 28] - commands = [3, 1, 3] + commands = [3, 1, 3, 1] # add height as a command class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg): @@ -178,13 +179,14 @@ class weights: orientation = 1.0 torques = 5.0e-7 dof_vel = 0.0 - min_base_height = 1.5 + # min_base_height = 1.5 action_rate = 0.01 action_rate2 = 0.001 stand_still = 0.0 dof_pos_limits = 0.0 feet_contact_forces = 0.0 dof_near_home = 0.0 + tracking_height = 0.5 class termination_weight: termination = 0.01 diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index b1d506b7..15a80d7f 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -15,6 +15,10 @@ def __init__(self, env): env.gym.subscribe_viewer_keyboard_event( env.viewer, gymapi.KEY_SPACE, "space_shoot" ) + env.gym.subscribe_viewer_keyboard_event(env.viewer, gymapi.KEY_UP, "height_up") + env.gym.subscribe_viewer_keyboard_event( + env.viewer, gymapi.KEY_DOWN, "height_down" + ) env.gym.subscribe_viewer_mouse_event( env.viewer, gymapi.MOUSE_LEFT_BUTTON, "mouse_shoot" ) @@ -24,6 +28,7 @@ def __init__(self, env): print("WASD: forward, strafe left, " "backward, strafe right") print("QE: yaw left/right") print("R: reset environments") + print("Arrow Up/Down: stand up/lay down") print("ESC: quit") print("______________________________________________________________") @@ -39,6 +44,10 @@ def __init__(self, env): self.max_vel_yaw = 2.0 self.increment_yaw = self.max_vel_yaw * 0.2 + self.min_height = 0.1 + self.max_height = 0.4 + self.increment_height = 0.1 + def update(self, env): for evt in env.gym.query_viewer_action_events(env.viewer): if evt.value == 0: @@ -73,6 +82,16 @@ def update(self, env): env.commands[:, 2] + self.increment_yaw, max=self.max_vel_yaw, ) + elif evt.action == "height_up": + env.commands[:, 3] = torch.clamp( + env.commands[:, 3] + self.increment_height, + max=self.max_height, + ) + elif evt.action == "height_down": + env.commands[:, 3] = torch.clamp( + env.commands[:, 3] - self.increment_height, + max=self.min_height, + ) elif evt.action == "QUIT": env.exit = True elif evt.action == "RESET": From 90f32713a4cc944df046ac01540c04bba8579811 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Thu, 23 Oct 2025 00:35:54 -0700 Subject: [PATCH 02/35] mini_cheetah with horse mass --- gym/envs/mini_cheetah/mini_cheetah_config.py | 4 +- .../urdf/mini_cheetah_simple.urdf | 116 +++++++++--------- 2 files changed, 59 insertions(+), 61 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 8a39a562..1782e9bc 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -59,8 +59,8 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: - stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0} - damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5} + stiffness = {"haa": 182.0, "hfe": 192.0, "kfe": 600.0} + damping = {"haa": 1.51, "hfe": 1.55, "kfe": 2.74} ctrl_frequency = 100 desired_sim_frequency = 500 diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index e5836117..d487d19b 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -1,11 +1,12 @@ - + - + - + + @@ -24,14 +25,15 @@ - + - + - + + @@ -39,10 +41,6 @@ - @@ -51,14 +49,15 @@ - + - + - + + @@ -80,14 +79,15 @@ - + - + - + + @@ -117,7 +117,8 @@ - + + @@ -129,23 +130,20 @@ - + - + - + + - @@ -154,14 +152,15 @@ - + - + - + + @@ -181,14 +180,15 @@ - + - + - + + @@ -218,7 +218,7 @@ - + @@ -230,14 +230,15 @@ - + - + - + + @@ -245,10 +246,6 @@ - @@ -257,14 +254,15 @@ - + - + - + + @@ -286,14 +284,15 @@ - + - + - + + @@ -323,7 +322,7 @@ - + @@ -335,23 +334,20 @@ - + - + - + + - @@ -360,14 +356,15 @@ - + - + - + + @@ -389,14 +386,15 @@ - + - + - + + @@ -426,7 +424,7 @@ - + From b4ed45cbc9d571328208b7ca3d1779d466286989 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 24 Oct 2025 00:54:13 -0700 Subject: [PATCH 03/35] scale inertia --- .../urdf/mini_cheetah_simple.urdf | 106 +++++------------- 1 file changed, 29 insertions(+), 77 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index d487d19b..01d08244 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -5,8 +5,7 @@ - - + @@ -25,15 +24,14 @@ - + - - + @@ -49,15 +47,14 @@ - + - - + @@ -79,15 +76,14 @@ - + - - + @@ -118,8 +114,7 @@ - - + @@ -130,15 +125,14 @@ - + - - + @@ -152,15 +146,14 @@ - + - - + @@ -168,10 +161,6 @@ - - - - @@ -180,15 +169,14 @@ - + - - + @@ -196,12 +184,6 @@ - - - - - - @@ -219,7 +201,7 @@ - + @@ -230,15 +212,14 @@ - + - - + @@ -254,15 +235,14 @@ - + - - + @@ -270,12 +250,6 @@ - - - - - - @@ -284,15 +258,14 @@ - + - - + @@ -300,12 +273,6 @@ - - - - - - @@ -323,7 +290,7 @@ - + @@ -334,15 +301,14 @@ - + - - + @@ -356,15 +322,14 @@ - + - - + @@ -372,12 +337,6 @@ - - - - - - @@ -386,15 +345,14 @@ - + - - + @@ -402,12 +360,6 @@ - - - - - - @@ -425,7 +377,7 @@ - + From 1582c754aafcd92ccf96f538cfb769c250012fcd Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Tue, 28 Oct 2025 22:59:21 -0700 Subject: [PATCH 04/35] baseline - stable cheetah --- gym/envs/mini_cheetah/mini_cheetah_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 1782e9bc..0b270f5c 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -10,7 +10,7 @@ class MiniCheetahCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 2**12 num_actuators = 12 - episode_length_s = 4 + episode_length_s = 6 class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" @@ -59,8 +59,8 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: - stiffness = {"haa": 182.0, "hfe": 192.0, "kfe": 600.0} - damping = {"haa": 1.51, "hfe": 1.55, "kfe": 2.74} + stiffness = {"haa": 379.193, "hfe": 406.701, "kfe": 1864.340} + damping = {"haa": 9.48, "hfe": 10.168, "kfe": 46.608} ctrl_frequency = 100 desired_sim_frequency = 500 @@ -100,7 +100,7 @@ class asset(LeggedRobotCfg.asset): flip_visual_attachments = False disable_gravity = False disable_motors = False - joint_damping = 0.1 + joint_damping = 0.3 rotor_inertia = [0.002268, 0.002268, 0.005484] * 4 class reward_settings(LeggedRobotCfg.reward_settings): From afb939c5a53891942ee6267031cd3a5cd2dce9fb Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 31 Oct 2025 01:25:14 -0700 Subject: [PATCH 05/35] baseline - split body --- gym/envs/mini_cheetah/mini_cheetah_config.py | 26 +++++++---- .../urdf/mini_cheetah_simple.urdf | 43 ++++++++++++++----- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 0b270f5c..8e918da5 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -9,7 +9,7 @@ class MiniCheetahCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 2**12 - num_actuators = 12 + num_actuators = 13 episode_length_s = 6 class terrain(LeggedRobotCfg.terrain): @@ -20,6 +20,7 @@ class init_state(LeggedRobotCfg.init_state): "haa": 0.0, "hfe": -0.785398, "kfe": 1.596976, + "base_joint": 0.0, } # * reset setup chooses how the initial conditions are chosen. @@ -38,8 +39,15 @@ class init_state(LeggedRobotCfg.init_state): "haa": [-0.01, 0.01], "hfe": [-0.785398, -0.785398], "kfe": [1.596976, 1.596976], + "base_joint": [-0.01, 0.01], } - dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]} + dof_vel_range = { + "haa": [0.0, 0.0], + "hfe": [0.0, 0.0], + "kfe": [0.0, 0.0], + "base_joint": [0.0, 0.0], + } + root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y @@ -59,8 +67,8 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: - stiffness = {"haa": 379.193, "hfe": 406.701, "kfe": 1864.340} - damping = {"haa": 9.48, "hfe": 10.168, "kfe": 46.608} + stiffness = {"haa": 379.193, "hfe": 406.701, "kfe": 1864.340, "base_joint": 500} + damping = {"haa": 9.48, "hfe": 10.168, "kfe": 46.608, "base_joint": 9.48} ctrl_frequency = 100 desired_sim_frequency = 500 @@ -101,7 +109,7 @@ class asset(LeggedRobotCfg.asset): disable_gravity = False disable_motors = False joint_damping = 0.3 - rotor_inertia = [0.002268, 0.002268, 0.005484] * 4 + rotor_inertia = [0.002268] + ([0.002268, 0.002268, 0.005484] * 4) class reward_settings(LeggedRobotCfg.reward_settings): soft_dof_pos_limit = 0.9 @@ -114,12 +122,12 @@ class reward_settings(LeggedRobotCfg.reward_settings): class scaling(LeggedRobotCfg.scaling): base_ang_vel = 0.3 base_lin_vel = BASE_HEIGHT_REF - dof_vel = 4 * [2.0, 2.0, 4.0] + dof_vel = [2.0] + (4 * [2.0, 2.0, 4.0]) base_height = 0.3 - dof_pos = 4 * [0.2, 0.3, 0.3] + dof_pos = [0.2] + (4 * [0.2, 0.3, 0.3]) dof_pos_obs = dof_pos - dof_pos_target = 4 * [0.2, 0.3, 0.3] - tau_ff = 4 * [18, 18, 28] + dof_pos_target = [0.2] + (4 * [0.2, 0.3, 0.3]) + tau_ff = [18] + (4 * [18, 18, 28]) commands = [3, 1, 3, 1] # add height as a command diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 01d08244..d0e83f61 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -1,28 +1,49 @@ - + - + - - + + - + + + + + + + + + + + + + + + + + + + + + + - + @@ -123,7 +144,7 @@ - + @@ -209,8 +230,8 @@ - - + + @@ -298,8 +319,8 @@ - - + + From fb34dd5fb7f2a15fb2ed23fe126037d91b6ad203 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Wed, 5 Nov 2025 01:21:11 -0800 Subject: [PATCH 06/35] mini_cheetah with horse values --- gym/envs/mini_cheetah/mini_cheetah_config.py | 50 ++- gym/utils/interfaces/KeyboardInterface.py | 4 +- .../urdf/mini_cheetah_simple.urdf | 328 ++++++++++-------- 3 files changed, 221 insertions(+), 161 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 64c48da0..bdc20bc1 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -3,14 +3,14 @@ LeggedRobotRunnerCfg, ) -BASE_HEIGHT_REF = 0.3 +BASE_HEIGHT_REF = 1.6 class MiniCheetahCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 2**12 - num_actuators = 13 - episode_length_s = 10 + num_actuators = 21 + episode_length_s = 12 class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" @@ -20,6 +20,8 @@ class init_state(LeggedRobotCfg.init_state): "haa": 0.0, "hfe": -0.785398, "kfe": 1.596976, + "pfe": 0.0, + "pastern_to_foot": 0.0, "base_joint": 0.0, } @@ -29,7 +31,7 @@ class init_state(LeggedRobotCfg.init_state): reset_mode = "reset_to_range" # * default COM for basic initialization - pos = [0.0, 0.0, 0.35] # x,y,z [m] + pos = [0.0, 0.0, 1.6] # x,y,z [m] rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] @@ -39,19 +41,23 @@ class init_state(LeggedRobotCfg.init_state): "haa": [-0.01, 0.01], "hfe": [-0.785398, -0.785398], "kfe": [1.596976, 1.596976], + "pfe": [0.0, 0.0], + "pastern_to_foot": [0.0, 0.00], "base_joint": [-0.01, 0.01], } dof_vel_range = { "haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0], + "pfe": [0.0, 0.0], + "pastern_to_foot": [0.0, 0.00], "base_joint": [0.0, 0.0], } root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y - [0.35, 0.35], # z + [1.6, 1.6], # z [0.0, 0.0], # roll [0.0, 0.0], # pitch [0.0, 0.0], # yaw @@ -67,8 +73,22 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: - stiffness = {"haa": 379.193, "hfe": 406.701, "kfe": 1864.340, "base_joint": 500} - damping = {"haa": 9.48, "hfe": 10.168, "kfe": 46.608, "base_joint": 9.48} + stiffness = { + "haa": 379.193, + "hfe": 406.701, + "kfe": 1864.340, + "pfe": 1864.340, + "pastern_to_foot": 1864.340, + "base_joint": 2500, + } + damping = { + "haa": 9.48, + "hfe": 10.168, + "kfe": 46.608, + "pfe": 1864.340, + "pastern_to_foot": 1864.340, + "base_joint": 9.48, + } ctrl_frequency = 100 desired_sim_frequency = 500 @@ -80,7 +100,7 @@ class ranges: lin_vel_x = [-2.0, 3.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] yaw_vel = 3 # max [rad/s] - height = [0.15, 0.35] # m + height = [0.61, 1.60] # m class push_robots: toggle = False @@ -109,7 +129,9 @@ class asset(LeggedRobotCfg.asset): disable_gravity = False disable_motors = False joint_damping = 0.3 - rotor_inertia = [0.002268] + ([0.002268, 0.002268, 0.005484] * 4) + rotor_inertia = [0.002268] + ( + [0.002268, 0.002268, 0.005484, 0.005484, 0.005484] * 4 + ) class reward_settings(LeggedRobotCfg.reward_settings): soft_dof_pos_limit = 0.9 @@ -122,12 +144,12 @@ class reward_settings(LeggedRobotCfg.reward_settings): class scaling(LeggedRobotCfg.scaling): base_ang_vel = 0.3 base_lin_vel = BASE_HEIGHT_REF - dof_vel = [2.0] + (4 * [2.0, 2.0, 4.0]) + dof_vel = [2.0] + (4 * [2.0, 2.0, 4.0, 4.0, 4.0]) base_height = 0.3 - dof_pos = [0.2] + (4 * [0.2, 0.3, 0.3]) + dof_pos = [0.2] + (4 * [0.2, 0.3, 0.3, 0.3, 0.3]) dof_pos_obs = dof_pos - dof_pos_target = [0.2] + (4 * [0.2, 0.3, 0.3]) - tau_ff = [18] + (4 * [18, 18, 28]) + dof_pos_target = [0.2] + (4 * [0.2, 0.3, 0.3, 0.3, 0.3]) + tau_ff = [18] + (4 * [18, 18, 28, 28, 28]) commands = [3, 1, 3, 1] # add height as a command @@ -194,7 +216,7 @@ class weights: dof_pos_limits = 0.0 feet_contact_forces = 0.0 dof_near_home = 0.0 - tracking_height = 0.5 + tracking_height = 1.0 class termination_weight: termination = 0.01 diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index 3c750bed..d9504813 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -44,8 +44,8 @@ def __init__(self, env): self.max_vel_yaw = 2.0 self.increment_yaw = self.max_vel_yaw * 0.2 - self.min_height = 0.1 - self.max_height = 0.4 + self.min_height = 0.6 + self.max_height = 1.6 self.increment_height = 0.1 def update(self, env): diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index d0e83f61..6c83a001 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -7,13 +7,19 @@ - - - + + + + + + + - - + + + + @@ -23,29 +29,36 @@ - - + + + + + + + - - + + + + - - + + - + - + @@ -54,21 +67,16 @@ - - - - - - + - + - + @@ -77,27 +85,17 @@ - - - - - - - - - - - - + + - + - + @@ -106,32 +104,41 @@ - - - - - - - - - - - - + + - - + + + + + + + + + + + + + + + + + + + + + + - + + - - - - + + @@ -143,10 +150,10 @@ - + - + @@ -155,19 +162,16 @@ - - - - + - - + + - + @@ -176,49 +180,69 @@ - - - - - - + + - + + + + + + + + + + - - - - - - + + - - + + + + + + + + + + + + + + + + + + + + + + - + + - - - - + + @@ -230,10 +254,10 @@ - + - + @@ -242,21 +266,16 @@ - - - - - - + - + - + @@ -265,21 +284,17 @@ - - - - - - + + - + - + @@ -288,28 +303,43 @@ - - - - - - + + - - + + + + + + + + + + + + + + + + + + + + + + - + + - - - - + + - + @@ -319,10 +349,10 @@ - + - + @@ -331,19 +361,16 @@ - - - - + - - + + - + @@ -352,21 +379,17 @@ - - - - - - + + - + - + @@ -375,26 +398,41 @@ - - - - - - + + - - + + + + + + + + + + + + + + + + + + + + + + - + + - - - - + + From f9fa9d3e1b3b7ea99ea022c0d78a0c9974bac402 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Wed, 5 Nov 2025 01:27:17 -0800 Subject: [PATCH 07/35] urdf-debugger fixed --- .../urdf/mini_cheetah_simple.urdf | 446 +++++++++--------- 1 file changed, 223 insertions(+), 223 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 6c83a001..7dd6c240 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -1,443 +1,443 @@ - + - + - - - + + + - + - + - + - + - + - - - + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - + + - - - + + + - + \ No newline at end of file From af0534bcae565868d41c29bef9eea1e0c317d0dc Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 7 Nov 2025 01:26:37 -0800 Subject: [PATCH 08/35] stable horse but can't stand --- gym/envs/mini_cheetah/mini_cheetah_config.py | 63 +-- gym/utils/interfaces/KeyboardInterface.py | 2 +- .../urdf/mini_cheetah_simple.urdf | 449 +++++++++--------- 3 files changed, 253 insertions(+), 261 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index bdc20bc1..b44080c9 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -3,14 +3,14 @@ LeggedRobotRunnerCfg, ) -BASE_HEIGHT_REF = 1.6 +BASE_HEIGHT_REF = 1.3 class MiniCheetahCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 2**12 - num_actuators = 21 - episode_length_s = 12 + num_actuators = 1 + 4 * 5 + episode_length_s = 20 class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" @@ -18,8 +18,8 @@ class terrain(LeggedRobotCfg.terrain): class init_state(LeggedRobotCfg.init_state): default_joint_angles = { "haa": 0.0, - "hfe": -0.785398, - "kfe": 1.596976, + "hfe": 0.0, + "kfe": 0.0, "pfe": 0.0, "pastern_to_foot": 0.0, "base_joint": 0.0, @@ -31,33 +31,33 @@ class init_state(LeggedRobotCfg.init_state): reset_mode = "reset_to_range" # * default COM for basic initialization - pos = [0.0, 0.0, 1.6] # x,y,z [m] + pos = [0.0, 0.0, 1.3] # x,y,z [m] rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] # * initialization for random range setup dof_pos_range = { - "haa": [-0.01, 0.01], - "hfe": [-0.785398, -0.785398], - "kfe": [1.596976, 1.596976], + "haa": [-0.2, 0.2], + "hfe": [-0.5, 0.5], + "kfe": [0.0, 0.0], "pfe": [0.0, 0.0], - "pastern_to_foot": [0.0, 0.00], - "base_joint": [-0.01, 0.01], + "pastern_to_foot": [0.0, 0.0], + "base_joint": [0.0, 0.0], } dof_vel_range = { "haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0], "pfe": [0.0, 0.0], - "pastern_to_foot": [0.0, 0.00], + "pastern_to_foot": [0.0, 0.0], "base_joint": [0.0, 0.0], } root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y - [1.6, 1.6], # z + [1.3, 1.3], # z [0.0, 0.0], # roll [0.0, 0.0], # pitch [0.0, 0.0], # yaw @@ -74,22 +74,22 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: stiffness = { - "haa": 379.193, - "hfe": 406.701, - "kfe": 1864.340, - "pfe": 1864.340, - "pastern_to_foot": 1864.340, + "haa": 800, + "hfe": 800, + "kfe": 4000, + "pfe": 4000, + "pastern_to_foot": 4000, "base_joint": 2500, } damping = { "haa": 9.48, - "hfe": 10.168, - "kfe": 46.608, - "pfe": 1864.340, - "pastern_to_foot": 1864.340, + "hfe": 50, + "kfe": 250, + "pfe": 3000, + "pastern_to_foot": 1000, "base_joint": 9.48, } - ctrl_frequency = 100 + ctrl_frequency = 250 desired_sim_frequency = 500 class commands: @@ -100,7 +100,7 @@ class ranges: lin_vel_x = [-2.0, 3.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] yaw_vel = 3 # max [rad/s] - height = [0.61, 1.60] # m + height = [0.61, 1.30] # m class push_robots: toggle = False @@ -110,7 +110,7 @@ class push_robots: class domain_rand: randomize_friction = True - friction_range = [0.5, 1.0] + friction_range = [0.5, 5.0] randomize_base_mass = False added_mass_range = [-1.0, 1.0] @@ -129,8 +129,9 @@ class asset(LeggedRobotCfg.asset): disable_gravity = False disable_motors = False joint_damping = 0.3 - rotor_inertia = [0.002268] + ( - [0.002268, 0.002268, 0.005484, 0.005484, 0.005484] * 4 + fix_base_link = False + rotor_inertia = [0.002268] + 4 * ( + [0.002268, 0.002268, 0.005484, 0.005484, 0.005484] ) class reward_settings(LeggedRobotCfg.reward_settings): @@ -144,12 +145,12 @@ class reward_settings(LeggedRobotCfg.reward_settings): class scaling(LeggedRobotCfg.scaling): base_ang_vel = 0.3 base_lin_vel = BASE_HEIGHT_REF - dof_vel = [2.0] + (4 * [2.0, 2.0, 4.0, 4.0, 4.0]) + dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) base_height = 0.3 dof_pos = [0.2] + (4 * [0.2, 0.3, 0.3, 0.3, 0.3]) dof_pos_obs = dof_pos - dof_pos_target = [0.2] + (4 * [0.2, 0.3, 0.3, 0.3, 0.3]) - tau_ff = [18] + (4 * [18, 18, 28, 28, 28]) + dof_pos_target = [0.2] + (4 * [0.2, 0.3, 0.2, 0.2, 0.2]) + tau_ff = [3600] + (4 * [3600, 3600, 400000, 400000, 5800]) commands = [3, 1, 3, 1] # add height as a command @@ -216,7 +217,7 @@ class weights: dof_pos_limits = 0.0 feet_contact_forces = 0.0 dof_near_home = 0.0 - tracking_height = 1.0 + tracking_height = 1.5 class termination_weight: termination = 0.01 diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index d9504813..9607c835 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -45,7 +45,7 @@ def __init__(self, env): self.increment_yaw = self.max_vel_yaw * 0.2 self.min_height = 0.6 - self.max_height = 1.6 + self.max_height = 1.3 self.increment_height = 0.1 def update(self, env): diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 7dd6c240..8f7d7cb7 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -1,442 +1,433 @@ - + - + - - - + + + - + - + - + - + - + - - - + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - - + + + - - + + - + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - - - - - - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - + + - - - + + + - - - - - + + + + + - - - + + + - + - - - - - + + + + + - - - + + + - - + + - - - - - + + + + + - - - + + + - - + + - + - - - - - + + + + + - - + + - - - + + + - + - - - - - + + + + + - - + + - - - + + + From a8801399503ced4f767ccc598bbe9e9a194f0a7d Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Sat, 8 Nov 2025 00:52:46 -0800 Subject: [PATCH 09/35] realistic effort values --- .../urdf/mini_cheetah_simple.urdf | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 8f7d7cb7..ad56648c 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -58,7 +58,7 @@ - + @@ -76,7 +76,7 @@ - + @@ -95,7 +95,7 @@ - + @@ -114,7 +114,7 @@ - + @@ -133,7 +133,7 @@ - + @@ -153,7 +153,7 @@ - + @@ -171,7 +171,7 @@ - + @@ -190,7 +190,7 @@ - + @@ -209,7 +209,7 @@ - + @@ -228,7 +228,7 @@ - + @@ -248,7 +248,7 @@ - + @@ -266,7 +266,7 @@ - + @@ -285,7 +285,7 @@ - + @@ -304,7 +304,7 @@ - + @@ -323,7 +323,7 @@ - + @@ -343,7 +343,7 @@ - + @@ -361,7 +361,7 @@ - + @@ -380,7 +380,7 @@ - + @@ -399,7 +399,7 @@ - + @@ -418,7 +418,7 @@ - + From c456a0c0906f02f93afc1559277f8a9132dce040 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Mon, 10 Nov 2025 00:24:19 -0800 Subject: [PATCH 10/35] better results --- gym/envs/base/legged_robot.py | 51 +++++++++++++++++++ gym/envs/mini_cheetah/mini_cheetah_config.py | 30 ++++++----- .../urdf/mini_cheetah_simple.urdf | 16 +++--- 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py index 80d4b453..cf9184b3 100644 --- a/gym/envs/base/legged_robot.py +++ b/gym/envs/base/legged_robot.py @@ -9,6 +9,8 @@ quat_from_euler_xyz, ) from isaacgym import gymtorch, gymapi +import matplotlib.pyplot as plt +import wandb from gym import LEGGED_GYM_ROOT_DIR from gym.envs.base.base_task import BaseTask @@ -39,6 +41,12 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): self.device = sim_device # todo CRIME: remove this from __init__ then self._parse_cfg(self.cfg) super().__init__(gym, sim, self.cfg, sim_params, sim_device, headless) + self.joint_log_table = wandb.Table( + columns=["step", "joint_name", "target_pos", "actual_pos", "torque"] + ) + self.com_table = wandb.Table( + columns=["step", "env_id", "COM_x", "COM_y", "COM_z"] + ) if not self.headless: self._set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat) @@ -52,12 +60,33 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): def step(self): self._reset_buffers() self._pre_decimation_step() + self.com_traj = [] # * step physics and render each frame self._render() for _ in range(self.cfg.control.decimation): self._pre_compute_torques() self.torques = self._compute_torques() self._post_compute_torques() + + # logging for wandb + if self.common_step_counter % 100 == 0: # log every 100th step + env_id = 0 + joint_data = {} + for j in range(self.dof_pos.shape[1]): + joint_data[f"joint_{j}/target_pos"] = float( + self.dof_pos_target[env_id, j] + ) + joint_data[f"joint_{j}/actual_pos"] = float(self.dof_pos[env_id, j]) + joint_data[f"joint_{j}/torque"] = float(self.torques[env_id, j]) + + # Append current COM position + com_pos = self.root_states[env_id, :3].cpu().numpy() + self.com_traj.append(com_pos) + + if wandb.run is not None: + wandb.log(joint_data) + # wandb.log({"COM_point_cloud": wandb.Object3D(com_point_cloud)}) + self._step_physx_sim() self._post_physx_step() @@ -67,6 +96,28 @@ def step(self): env_ids = self.to_be_reset.nonzero(as_tuple=False).flatten() self._reset_idx(env_ids) + # If episode ended, plot COM trajectory in 3D and log to W&B + if ( + hasattr(self, "episode_done") + and self.episode_done + and len(self.com_traj) > 0 + ): + com_array = np.array(self.com_traj) + fig = plt.figure() + ax = fig.add_subplot(111, projection="3d") + ax.scatter( + com_array[:, 0], com_array[:, 1], com_array[:, 2], c="blue", marker="o" + ) + ax.set_xlabel("X") + ax.set_ylabel("Y") + ax.set_zlabel("Z") + ax.set_title("COM Trajectory") + wandb.log({"COM_trajectory_plot": wandb.Image(fig)}) + plt.close(fig) + + # Reset trajectory for next episode + self.com_traj = [] + def _pre_decimation_step(self): return None diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index b44080c9..0125d169 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -74,23 +74,23 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: stiffness = { - "haa": 800, - "hfe": 800, + "haa": 4000, + "hfe": 4000, "kfe": 4000, "pfe": 4000, "pastern_to_foot": 4000, - "base_joint": 2500, + "base_joint": 50, } damping = { - "haa": 9.48, - "hfe": 50, + "haa": 250, + "hfe": 250, "kfe": 250, - "pfe": 3000, - "pastern_to_foot": 1000, - "base_joint": 9.48, + "pfe": 250, + "pastern_to_foot": 250, + "base_joint": 10, } - ctrl_frequency = 250 - desired_sim_frequency = 500 + ctrl_frequency = 500 # how often the PDF controller/action updates run + desired_sim_frequency = 1000 # how often the physics is calculated class commands: # * time before command are changed[s] @@ -146,11 +146,13 @@ class scaling(LeggedRobotCfg.scaling): base_ang_vel = 0.3 base_lin_vel = BASE_HEIGHT_REF dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) - base_height = 0.3 - dof_pos = [0.2] + (4 * [0.2, 0.3, 0.3, 0.3, 0.3]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + ( + 4 * [1.0, 1.0, 1.0, 1.0, 1.0] + ) # i think these values were too small? dof_pos_obs = dof_pos - dof_pos_target = [0.2] + (4 * [0.2, 0.3, 0.2, 0.2, 0.2]) - tau_ff = [3600] + (4 * [3600, 3600, 400000, 400000, 5800]) + dof_pos_target = [0.2] + (4 * [0.1, 0.1, 0.1, 0.1, 0.1]) # target joint angles + tau_ff = [3600] + (4 * [3600, 3600, 400000, 400000, 5800]) # not being used commands = [3, 1, 3, 1] # add height as a command diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index ad56648c..264cd721 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -58,7 +58,7 @@ - + @@ -95,7 +95,7 @@ - + @@ -153,7 +153,7 @@ - + @@ -190,7 +190,7 @@ - + @@ -248,7 +248,7 @@ - + @@ -285,7 +285,7 @@ - + @@ -343,7 +343,7 @@ - + @@ -380,7 +380,7 @@ - + From 5001e40b89edb4013d0fa8762433d84a30766a66 Mon Sep 17 00:00:00 2001 From: sheim Date: Tue, 11 Nov 2025 09:33:16 -0500 Subject: [PATCH 11/35] manual merge of non-logging part of yl/horse_RL --- gym/envs/mini_cheetah/mini_cheetah_config.py | 30 +++++++------- .../urdf/mini_cheetah_simple.urdf | 40 +++++++++---------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index b44080c9..0125d169 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -74,23 +74,23 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: stiffness = { - "haa": 800, - "hfe": 800, + "haa": 4000, + "hfe": 4000, "kfe": 4000, "pfe": 4000, "pastern_to_foot": 4000, - "base_joint": 2500, + "base_joint": 50, } damping = { - "haa": 9.48, - "hfe": 50, + "haa": 250, + "hfe": 250, "kfe": 250, - "pfe": 3000, - "pastern_to_foot": 1000, - "base_joint": 9.48, + "pfe": 250, + "pastern_to_foot": 250, + "base_joint": 10, } - ctrl_frequency = 250 - desired_sim_frequency = 500 + ctrl_frequency = 500 # how often the PDF controller/action updates run + desired_sim_frequency = 1000 # how often the physics is calculated class commands: # * time before command are changed[s] @@ -146,11 +146,13 @@ class scaling(LeggedRobotCfg.scaling): base_ang_vel = 0.3 base_lin_vel = BASE_HEIGHT_REF dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) - base_height = 0.3 - dof_pos = [0.2] + (4 * [0.2, 0.3, 0.3, 0.3, 0.3]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + ( + 4 * [1.0, 1.0, 1.0, 1.0, 1.0] + ) # i think these values were too small? dof_pos_obs = dof_pos - dof_pos_target = [0.2] + (4 * [0.2, 0.3, 0.2, 0.2, 0.2]) - tau_ff = [3600] + (4 * [3600, 3600, 400000, 400000, 5800]) + dof_pos_target = [0.2] + (4 * [0.1, 0.1, 0.1, 0.1, 0.1]) # target joint angles + tau_ff = [3600] + (4 * [3600, 3600, 400000, 400000, 5800]) # not being used commands = [3, 1, 3, 1] # add height as a command diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 8f7d7cb7..264cd721 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -58,7 +58,7 @@ - + @@ -76,7 +76,7 @@ - + @@ -95,7 +95,7 @@ - + @@ -114,7 +114,7 @@ - + @@ -133,7 +133,7 @@ - + @@ -153,7 +153,7 @@ - + @@ -171,7 +171,7 @@ - + @@ -190,7 +190,7 @@ - + @@ -209,7 +209,7 @@ - + @@ -228,7 +228,7 @@ - + @@ -248,7 +248,7 @@ - + @@ -266,7 +266,7 @@ - + @@ -285,7 +285,7 @@ - + @@ -304,7 +304,7 @@ - + @@ -323,7 +323,7 @@ - + @@ -343,7 +343,7 @@ - + @@ -361,7 +361,7 @@ - + @@ -380,7 +380,7 @@ - + @@ -399,7 +399,7 @@ - + @@ -418,7 +418,7 @@ - + From 12dc7a4f5b440a17076be941be26eccba6b4734d Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Tue, 11 Nov 2025 23:58:45 -0800 Subject: [PATCH 12/35] good SWE cleanup --- .gitignore | 1 + gym/envs/base/legged_robot.py | 51 ----------------------- scripts/play.py | 77 +++++++++++++++++++++++++++++------ scripts/plot_logs.py | 70 +++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+), 63 deletions(-) create mode 100644 scripts/plot_logs.py diff --git a/.gitignore b/.gitignore index c78e6696..9d6c8a7e 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ gym/wandb *.npz user/wandb_config.json *trajectories/ +*.png # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py index cf9184b3..80d4b453 100644 --- a/gym/envs/base/legged_robot.py +++ b/gym/envs/base/legged_robot.py @@ -9,8 +9,6 @@ quat_from_euler_xyz, ) from isaacgym import gymtorch, gymapi -import matplotlib.pyplot as plt -import wandb from gym import LEGGED_GYM_ROOT_DIR from gym.envs.base.base_task import BaseTask @@ -41,12 +39,6 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): self.device = sim_device # todo CRIME: remove this from __init__ then self._parse_cfg(self.cfg) super().__init__(gym, sim, self.cfg, sim_params, sim_device, headless) - self.joint_log_table = wandb.Table( - columns=["step", "joint_name", "target_pos", "actual_pos", "torque"] - ) - self.com_table = wandb.Table( - columns=["step", "env_id", "COM_x", "COM_y", "COM_z"] - ) if not self.headless: self._set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat) @@ -60,33 +52,12 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): def step(self): self._reset_buffers() self._pre_decimation_step() - self.com_traj = [] # * step physics and render each frame self._render() for _ in range(self.cfg.control.decimation): self._pre_compute_torques() self.torques = self._compute_torques() self._post_compute_torques() - - # logging for wandb - if self.common_step_counter % 100 == 0: # log every 100th step - env_id = 0 - joint_data = {} - for j in range(self.dof_pos.shape[1]): - joint_data[f"joint_{j}/target_pos"] = float( - self.dof_pos_target[env_id, j] - ) - joint_data[f"joint_{j}/actual_pos"] = float(self.dof_pos[env_id, j]) - joint_data[f"joint_{j}/torque"] = float(self.torques[env_id, j]) - - # Append current COM position - com_pos = self.root_states[env_id, :3].cpu().numpy() - self.com_traj.append(com_pos) - - if wandb.run is not None: - wandb.log(joint_data) - # wandb.log({"COM_point_cloud": wandb.Object3D(com_point_cloud)}) - self._step_physx_sim() self._post_physx_step() @@ -96,28 +67,6 @@ def step(self): env_ids = self.to_be_reset.nonzero(as_tuple=False).flatten() self._reset_idx(env_ids) - # If episode ended, plot COM trajectory in 3D and log to W&B - if ( - hasattr(self, "episode_done") - and self.episode_done - and len(self.com_traj) > 0 - ): - com_array = np.array(self.com_traj) - fig = plt.figure() - ax = fig.add_subplot(111, projection="3d") - ax.scatter( - com_array[:, 0], com_array[:, 1], com_array[:, 2], c="blue", marker="o" - ) - ax.set_xlabel("X") - ax.set_ylabel("Y") - ax.set_zlabel("Z") - ax.set_title("COM Trajectory") - wandb.log({"COM_trajectory_plot": wandb.Image(fig)}) - plt.close(fig) - - # Reset trajectory for next episode - self.com_traj = [] - def _pre_decimation_step(self): return None diff --git a/scripts/play.py b/scripts/play.py index 833e6641..9d7380ae 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -5,6 +5,8 @@ # torch needs to be imported after isaacgym imports in local source import torch +import numpy as np +import os def setup(args): @@ -29,7 +31,35 @@ def setup(args): return env, runner, train_cfg +def create_logging_dict(env, num_steps): + """ + Creates a dictionary of tensors to store joint data for each timestep. + """ + num_envs = env.num_envs + num_dofs = env.num_dof + + log_dict = { + "step": torch.zeros((num_steps,), dtype=torch.int32, device=env.device), + "target_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + "actual_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + "torque": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + } + + # Get joint names + joint_names = ( + env.dof_names + if hasattr(env, "dof_names") + else [f"joint_{i}" for i in range(num_dofs)] + ) + log_dict["joint_name"] = joint_names + + return log_dict + + def play(env, runner, train_cfg): + num_steps = int(env.max_episode_length) + log_data = create_logging_dict(env, num_steps) + # * set up recording if env.cfg.viewer.record: recorder = VisualizationRecorder( @@ -41,18 +71,41 @@ def play(env, runner, train_cfg): if COMMANDS_INTERFACE: # interface = GamepadInterface(env) interface = KeyboardInterface(env) - for i in range(10 * int(env.max_episode_length)): - if COMMANDS_INTERFACE: - interface.update(env) - if env.cfg.viewer.record: - recorder.update(i) - runner.set_actions( - runner.actor_cfg["actions"], - runner.get_inference_actions(), - runner.actor_cfg["disable_actions"], - ) - env.step() - env.check_exit() + + try: + for i in range(10 * int(env.max_episode_length)): + if COMMANDS_INTERFACE: + interface.update(env) + if env.cfg.viewer.record: + recorder.update(i) + + runner.set_actions( + runner.actor_cfg["actions"], + runner.get_inference_actions(), + runner.actor_cfg["disable_actions"], + ) + env.step() + + if i < num_steps: # avoid overflow if loop is longer than episode + log_data["step"][i] = i + log_data["target_pos"][:, i, :] = env.dof_pos_target + log_data["actual_pos"][:, i, :] = env.dof_pos + log_data["torque"][:, i, :] = env.torques + + env.check_exit() # user exit or viewer closed + + except KeyboardInterrupt: + print("\n[INFO] Interrupted by user, saving logs...") + except SystemExit: + print("\n[INFO] Viewer closed, saving logs...") + finally: + save_path = os.path.join(os.getcwd(), "joint_logs.npz") + log_data_cpu = { + k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v) + for k, v in log_data.items() + } + np.savez_compressed(save_path, **log_data_cpu) + print(f"\nSaved joint log to {save_path}") if __name__ == "__main__": diff --git a/scripts/plot_logs.py b/scripts/plot_logs.py new file mode 100644 index 00000000..b4fd18a2 --- /dev/null +++ b/scripts/plot_logs.py @@ -0,0 +1,70 @@ +import numpy as np +import matplotlib.pyplot as plt +import os + +log_path = os.path.join(os.getcwd(), "joint_logs.npz") +if not os.path.exists(log_path): + raise FileNotFoundError(f"Could not find log file at {log_path}") + +data = np.load(log_path, allow_pickle=True) +steps = data["step"] +target_pos = data["target_pos"] +actual_pos = data["actual_pos"] +torques = data["torque"] +joint_names = data["joint_name"] + + +num_joints = len(joint_names) +num_steps = len(steps) +env_id = 0 # only visualize env 0 + +print(f"Loaded {num_joints} joints, {num_steps} steps from environment {env_id}.") + +fig, axs = plt.subplots(num_joints * 2, 1, figsize=(10, 4 * num_joints), sharex=True) +fig.suptitle("Joint Target/Actual Positions and Torques Over Time", fontsize=16) + +for j in range(num_joints): + pos_ax = axs[2 * j] + torque_ax = axs[2 * j + 1] + + # Find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[env_id, :, j]) > 1e-6)[0] + if len(nonzero_idx) > 0: + last_valid = nonzero_idx[-1] + 1 + else: + last_valid = len(steps) + + # --- Position subplot --- + pos_ax.plot( + steps[:last_valid], + target_pos[env_id, :last_valid, j], + label="Target Pos", + linestyle="--", + linewidth=1.5, + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[env_id, :last_valid, j], + label="Actual Pos", + linewidth=1.5, + ) + pos_ax.set_ylabel(f"{joint_names[j]} Pos (rad)") + pos_ax.legend(loc="upper right", fontsize=8) + pos_ax.grid(True, linestyle="--", alpha=0.4) + + # --- Torque subplot --- + torque_ax.plot( + steps[:last_valid], + torques[env_id, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_ylabel(f"{joint_names[j]} Torque (Nm)") + torque_ax.grid(True, linestyle="--", alpha=0.4) + +axs[-1].set_xlabel("Simulation Step") +plt.tight_layout(rect=[0, 0, 1, 0.96]) + +save_path = os.path.join(os.getcwd(), "joint_data_all_joints_separated.png") +plt.savefig(save_path, dpi=300) +print(f"Saved separated plot to {save_path}") From e1b3733bf59e6c20feb6120ba8b2913544ff0d9a Mon Sep 17 00:00:00 2001 From: sheim Date: Wed, 12 Nov 2025 10:47:37 -0500 Subject: [PATCH 13/35] WIP: partial urdf changes --- .../urdf/mini_cheetah_simple.urdf | 216 +++++++++--------- 1 file changed, 110 insertions(+), 106 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 264cd721..0fedbd3d 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -1,6 +1,6 @@ - + - + @@ -53,15 +53,15 @@ - + - + - + @@ -71,15 +71,15 @@ - + - - + + - + @@ -90,15 +90,15 @@ - + - - - + + + - + @@ -109,15 +109,15 @@ - + - - + + - + @@ -128,15 +128,15 @@ - + - - + + - + @@ -148,52 +148,56 @@ - + - + - + - + + + + + - + - - - - + + + + - + - - + + - - - - - - + + + + + + - + @@ -204,15 +208,15 @@ - - + + - - - + + + - + @@ -223,15 +227,15 @@ - - - - - - + + + + + + - + @@ -243,15 +247,15 @@ - + - + - + - + @@ -261,15 +265,15 @@ - - + + - - - + + + - + @@ -280,15 +284,15 @@ - - + + - - - + + + - + @@ -299,15 +303,15 @@ - + - - - + + + - + @@ -318,15 +322,15 @@ - + - - + + - + @@ -338,15 +342,15 @@ - + - + - + - + @@ -356,15 +360,15 @@ - + - - - + + + - + @@ -375,15 +379,15 @@ - - - - - - + + + + + + - + @@ -394,15 +398,15 @@ - - - - - - + + + + + + - + @@ -413,15 +417,15 @@ - + - - + + - + From 9aae2835cbe628392dd60775e2e0f5599e100921 Mon Sep 17 00:00:00 2001 From: sheim Date: Wed, 12 Nov 2025 10:48:26 -0500 Subject: [PATCH 14/35] reformat mini_cheetah urdf file for readability --- .../urdf/mini_cheetah_simple.urdf | 1008 ++++++++++------- 1 file changed, 573 insertions(+), 435 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 0fedbd3d..1f3abf1f 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -1,438 +1,576 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From f155bb41925b93ab748ef6670f7bd709bce3b64f Mon Sep 17 00:00:00 2001 From: sheim Date: Wed, 12 Nov 2025 11:56:53 -0500 Subject: [PATCH 15/35] fix left legs --- .../urdf/mini_cheetah_simple.urdf | 90 ++++++++++--------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 1f3abf1f..a62ba48a 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -73,6 +73,12 @@ + + + + + + @@ -89,15 +95,15 @@ - + - + - + @@ -222,23 +228,23 @@ - + - + - + - + @@ -254,7 +260,7 @@ - + @@ -262,27 +268,27 @@ - + - + - + - + - + - + - + @@ -291,20 +297,20 @@ - - + + - + - + - + - + @@ -447,7 +453,7 @@ - + @@ -471,7 +477,7 @@ - + @@ -482,23 +488,23 @@ - + - + - + - + @@ -508,36 +514,36 @@ - + - + - + - + - + - + - + - + - + @@ -548,23 +554,23 @@ - + - - + + - + - + - + From 318d55ea82cfecce90933ef3a496592f72616408 Mon Sep 17 00:00:00 2001 From: sheim Date: Wed, 12 Nov 2025 13:44:01 -0500 Subject: [PATCH 16/35] mirror left and right --- .../urdf/mini_cheetah_simple.urdf | 136 +++++++++--------- 1 file changed, 65 insertions(+), 71 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index a62ba48a..3e733ae9 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -9,7 +9,7 @@ - + @@ -18,7 +18,7 @@ - + @@ -54,7 +54,7 @@ - + @@ -64,7 +64,7 @@ - + @@ -73,47 +73,41 @@ - - - - - - - + - - + + - + - + - + - + - + - + - + - - + + - + @@ -129,57 +123,57 @@ - + - + - - + + - + - + - + - + - + - + - + - + - - + + - + - + - + - + @@ -322,7 +316,7 @@ - + @@ -331,7 +325,7 @@ - + @@ -341,40 +335,40 @@ - + - + - + - + - + - + - + - + - + @@ -384,36 +378,36 @@ - + - + - + - + - + - - + + - + - + - + - + - + @@ -422,25 +416,25 @@ - + - + - - + + - + - + - + From b247418b5010b4c8979b347e4439e96bf3a2118b Mon Sep 17 00:00:00 2001 From: sheim Date: Wed, 12 Nov 2025 14:01:40 -0500 Subject: [PATCH 17/35] remove inertial offsets along y for symmetry --- .../urdf/mini_cheetah_simple.urdf | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 3e733ae9..2b6de3a7 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -64,7 +64,7 @@ - + @@ -77,7 +77,7 @@ - + @@ -85,7 +85,7 @@ - + @@ -216,7 +216,7 @@ - + @@ -317,7 +317,7 @@ - + @@ -325,7 +325,7 @@ - + @@ -338,7 +338,7 @@ - + @@ -350,7 +350,7 @@ - + @@ -455,7 +455,7 @@ - + @@ -480,7 +480,7 @@ - + From 528538413ceca51e91a385a7a77d366c6e33ade4 Mon Sep 17 00:00:00 2001 From: sheim Date: Wed, 12 Nov 2025 16:58:54 -0500 Subject: [PATCH 18/35] rename urdf for symmetry ease, some minor tweaks --- gym/envs/mini_cheetah/mini_cheetah_config.py | 35 ++- .../urdf/mini_cheetah_simple.urdf | 208 +++++++++--------- 2 files changed, 133 insertions(+), 110 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 995962ed..f6a20586 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -10,7 +10,7 @@ class MiniCheetahCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 2**12 num_actuators = 1 + 4 * 5 - episode_length_s = 20 + episode_length_s = 10 class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" @@ -31,7 +31,7 @@ class init_state(LeggedRobotCfg.init_state): reset_mode = "reset_to_range" # * default COM for basic initialization - pos = [0.0, 0.0, 1.3] # x,y,z [m] + pos = [0.0, 0.0, 1.4] # x,y,z [m] rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] @@ -97,9 +97,9 @@ class commands: resampling_time = 3.0 class ranges: - lin_vel_x = [-2.0, 3.0] # min max [m/s] + lin_vel_x = [-1.0, 8.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] - yaw_vel = 3 # max [rad/s] + yaw_vel = 6 # max [rad/s] height = [0.61, 1.30] # m class push_robots: @@ -151,7 +151,7 @@ class scaling(LeggedRobotCfg.scaling): 4 * [1.0, 1.0, 1.0, 1.0, 1.0] ) # i think these values were too small? dof_pos_obs = dof_pos - dof_pos_target = [0.2] + (4 * [0.1, 0.1, 0.1, 0.1, 0.1]) # target joint angles + dof_pos_target = [0.2] + (4 * [0.4, 0.4, 0.4, 0.2, 0.2]) # target joint angles tau_ff = [3600] + (4 * [3600, 3600, 400000, 400000, 5800]) # not being used commands = [3, 1, 3, 1] # add height as a command @@ -165,6 +165,8 @@ class actor(LeggedRobotRunnerCfg.actor): # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid activation = "elu" obs = [ + "base_height", + "base_lin_vel", "base_ang_vel", "projected_gravity", "commands", @@ -225,7 +227,28 @@ class termination_weight: termination = 0.01 class algorithm(LeggedRobotRunnerCfg.algorithm): - pass + class algorithm: + # both + gamma = 0.99 + lam = 0.95 + # shared + batch_size = 2**15 + max_gradient_steps = 24 + # new + storage_size = 2**17 # new + batch_size = 2**15 # new + + clip_param = 0.2 + learning_rate = 1.0e-3 + max_grad_norm = 1.0 + # Critic + use_clipped_value_loss = True + # Actor + entropy_coef = 0.01 + schedule = "adaptive" # could be adaptive, fixed + desired_kl = 0.01 + lr_range = [2e-4, 1e-2] + lr_ratio = 1.3 class runner(LeggedRobotRunnerCfg.runner): run_name = "" diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 2b6de3a7..4b05e72c 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -22,6 +22,13 @@ + + + + + + + @@ -44,24 +51,17 @@ - - - - - - - - + - + - + @@ -75,14 +75,14 @@ - + - - + + - + @@ -91,25 +91,25 @@ - + - + - + - - + + - + @@ -117,36 +117,36 @@ - + - + - + - - + + - + - + - + @@ -156,24 +156,24 @@ - + - - + + - + - + - + @@ -184,18 +184,18 @@ - + - + - + - + @@ -206,14 +206,14 @@ - + - - + + - + @@ -222,25 +222,25 @@ - + - + - + - - + + - + @@ -248,36 +248,36 @@ - + - + - + - - + + - + - + - + @@ -287,24 +287,24 @@ - + - - + + - + - + - + @@ -315,14 +315,14 @@ - + - + - + @@ -336,14 +336,14 @@ - + - - + + - + @@ -363,14 +363,14 @@ - + - - + + - + @@ -378,26 +378,26 @@ - + - + - + - - + + - + @@ -417,14 +417,14 @@ - + - - + + - + @@ -445,14 +445,14 @@ - + - + - + @@ -466,14 +466,14 @@ - + - - + + - + @@ -493,14 +493,14 @@ - + - - + + - + @@ -508,26 +508,26 @@ - + - + - + - - + + - + @@ -547,14 +547,14 @@ - + - - + + - + From 71985f677baa817148a52c669060515e04031e52 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Thu, 13 Nov 2025 00:49:52 -0800 Subject: [PATCH 19/35] plots separated by leg or joint --- scripts/plot_logs.py | 70 -------------- scripts/plot_logs_by_joint.py | 172 ++++++++++++++++++++++++++++++++++ scripts/plot_logs_by_leg.py | 97 +++++++++++++++++++ 3 files changed, 269 insertions(+), 70 deletions(-) delete mode 100644 scripts/plot_logs.py create mode 100644 scripts/plot_logs_by_joint.py create mode 100644 scripts/plot_logs_by_leg.py diff --git a/scripts/plot_logs.py b/scripts/plot_logs.py deleted file mode 100644 index b4fd18a2..00000000 --- a/scripts/plot_logs.py +++ /dev/null @@ -1,70 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -import os - -log_path = os.path.join(os.getcwd(), "joint_logs.npz") -if not os.path.exists(log_path): - raise FileNotFoundError(f"Could not find log file at {log_path}") - -data = np.load(log_path, allow_pickle=True) -steps = data["step"] -target_pos = data["target_pos"] -actual_pos = data["actual_pos"] -torques = data["torque"] -joint_names = data["joint_name"] - - -num_joints = len(joint_names) -num_steps = len(steps) -env_id = 0 # only visualize env 0 - -print(f"Loaded {num_joints} joints, {num_steps} steps from environment {env_id}.") - -fig, axs = plt.subplots(num_joints * 2, 1, figsize=(10, 4 * num_joints), sharex=True) -fig.suptitle("Joint Target/Actual Positions and Torques Over Time", fontsize=16) - -for j in range(num_joints): - pos_ax = axs[2 * j] - torque_ax = axs[2 * j + 1] - - # Find last index with nonzero torque (or position) - nonzero_idx = np.where(np.abs(actual_pos[env_id, :, j]) > 1e-6)[0] - if len(nonzero_idx) > 0: - last_valid = nonzero_idx[-1] + 1 - else: - last_valid = len(steps) - - # --- Position subplot --- - pos_ax.plot( - steps[:last_valid], - target_pos[env_id, :last_valid, j], - label="Target Pos", - linestyle="--", - linewidth=1.5, - ) - pos_ax.plot( - steps[:last_valid], - actual_pos[env_id, :last_valid, j], - label="Actual Pos", - linewidth=1.5, - ) - pos_ax.set_ylabel(f"{joint_names[j]} Pos (rad)") - pos_ax.legend(loc="upper right", fontsize=8) - pos_ax.grid(True, linestyle="--", alpha=0.4) - - # --- Torque subplot --- - torque_ax.plot( - steps[:last_valid], - torques[env_id, :last_valid, j], - color="tab:red", - linewidth=1.2, - ) - torque_ax.set_ylabel(f"{joint_names[j]} Torque (Nm)") - torque_ax.grid(True, linestyle="--", alpha=0.4) - -axs[-1].set_xlabel("Simulation Step") -plt.tight_layout(rect=[0, 0, 1, 0.96]) - -save_path = os.path.join(os.getcwd(), "joint_data_all_joints_separated.png") -plt.savefig(save_path, dpi=300) -print(f"Saved separated plot to {save_path}") diff --git a/scripts/plot_logs_by_joint.py b/scripts/plot_logs_by_joint.py new file mode 100644 index 00000000..7ed25e70 --- /dev/null +++ b/scripts/plot_logs_by_joint.py @@ -0,0 +1,172 @@ +import numpy as np +import matplotlib.pyplot as plt +import os +import re + +# config +LOG_FILE = "joint_logs.npz" +ENV_ID = 0 +SAVE_DIR = "plots_by_joint" + +# output folder +os.makedirs(SAVE_DIR, exist_ok=True) + +data = np.load(LOG_FILE, allow_pickle=True) +steps = data["step"] +target_pos = data["target_pos"] +actual_pos = data["actual_pos"] +torques = data["torque"] +joint_names = data["joint_name"] + +num_envs, num_steps, num_joints = actual_pos.shape +print(f"Loaded log for {num_joints} joints, {num_steps} steps.") + +pattern = re.compile(r".*(rh|lh|rf|lf)_(haa|hfe|kfe|pfe|pastern_to_foot)$") + +joint_map = {} +base_joints = {} + +for idx, name in enumerate(joint_names): + match = pattern.match(name) + if match: + leg, joint_type = match.groups() + leg, joint_type = leg.lower(), joint_type.lower() + joint_map.setdefault(joint_type, {})[leg] = idx + else: + # any joint name not matching the joint pattern will get its own plot + base_joints[name] = idx + + +def plot_joint_type(joint_type, leg_to_idx): + # 4 rows (LF/RF pos+torque, LH/RH pos+torque) × 2 columns + fig, axs = plt.subplots(4, 2, figsize=(12, 12), constrained_layout=True) + axs = np.array(axs) + + leg_pos = { + "lf": (0, 0), + "rf": (0, 1), + "lh": (2, 0), + "rh": (2, 1), + } + + for leg, (pos_row, col) in leg_pos.items(): + torque_row = pos_row + 1 + if leg not in leg_to_idx: + axs[pos_row, col].axis("off") + axs[torque_row, col].axis("off") + continue + + j = leg_to_idx[leg] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + if len(nonzero_idx) > 0: + last_valid = nonzero_idx[-1] + 1 + else: + last_valid = len(steps) + + # position plot + axs[pos_row, col].plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + linestyle="--", + linewidth=1.3, + label="Target Pos", + ) + axs[pos_row, col].plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + linewidth=1.3, + label="Actual Pos", + ) + axs[pos_row, col].set_title(f"{leg.upper()} - {joint_type.upper()} Position") + axs[pos_row, col].set_ylabel("Position (rad)") + axs[pos_row, col].grid(True, linestyle="--", alpha=0.4) + axs[pos_row, col].legend(fontsize=7) + + # torque plot + axs[torque_row, col].plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + axs[torque_row, col].set_title(f"{leg.upper()} - {joint_type.upper()} Torque") + axs[torque_row, col].set_ylabel("Torque (Nm)") + axs[torque_row, col].set_xlabel("Step") + axs[torque_row, col].grid(True, linestyle="--", alpha=0.4) + + plt.suptitle(f"{joint_type.upper()} Joints (All Legs)", fontsize=16) + save_path = os.path.join(SAVE_DIR, f"{joint_type}.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {save_path}") + + +# plot function for base +def plot_base_joints(base_joints): + fig, axs = plt.subplots( + len(base_joints) * 2, + 1, + figsize=(10, 4 * len(base_joints)), + constrained_layout=True, + ) + + if len(base_joints) == 1: + axs = np.array([axs[0], axs[1]]) + + for i, (name, j) in enumerate(base_joints.items()): + pos_ax = axs[2 * i] + torque_ax = axs[2 * i + 1] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + if len(nonzero_idx) > 0: + last_valid = nonzero_idx[-1] + 1 + else: + last_valid = len(steps) + + # position + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + linestyle="--", + linewidth=1.3, + label="Target Pos", + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + linewidth=1.3, + label="Actual Pos", + ) + pos_ax.set_title(f"{name} Position") + pos_ax.set_ylabel("Position (rad)") + pos_ax.legend(fontsize=7) + pos_ax.grid(True, linestyle="--", alpha=0.4) + + # torque + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_title(f"{name} Torque") + torque_ax.set_ylabel("Torque (Nm)") + torque_ax.set_xlabel("Step") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + plt.suptitle("Base Joints", fontsize=16) + save_path = os.path.join(SAVE_DIR, "base_joints.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {save_path}") + + +# generate plots +for joint_type, leg_to_idx in joint_map.items(): + plot_joint_type(joint_type, leg_to_idx) + +if base_joints: + plot_base_joints(base_joints) diff --git a/scripts/plot_logs_by_leg.py b/scripts/plot_logs_by_leg.py new file mode 100644 index 00000000..a12351cc --- /dev/null +++ b/scripts/plot_logs_by_leg.py @@ -0,0 +1,97 @@ +import numpy as np +import matplotlib.pyplot as plt +import os + +# config +LOG_FILE = "joint_logs.npz" +ENV_ID = 0 +SAVE_DIR = "plots_by_leg" + +# output folder +os.makedirs(SAVE_DIR, exist_ok=True) + +data = np.load(LOG_FILE, allow_pickle=True) +steps = data["step"] +target_pos = data["target_pos"] +actual_pos = data["actual_pos"] +torques = data["torque"] +joint_names = data["joint_name"] + +num_envs, num_steps, num_joints = actual_pos.shape +print(f"Loaded log for {num_joints} joints, {num_steps} steps.") + +# define grouping +groups = { + "base": [0], + "right_hind": list(range(1, 6)), + "left_hind": list(range(6, 11)), + "right_front": list(range(11, 16)), + "left_front": list(range(16, 21)), +} + + +def plot_joint_group(group_name, joint_indices): + num_joints_group = len(joint_indices) + fig, axs = plt.subplots( + nrows=num_joints_group * 2, + ncols=1, + figsize=(10, 3 * num_joints_group * 2), + constrained_layout=True, + ) + + if num_joints_group == 1: + axs = np.array([axs[0], axs[1]]) # ensure consistent 2D array shape + + for idx, j in enumerate(joint_indices): + pos_ax = axs[2 * idx] + torque_ax = axs[2 * idx + 1] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + if len(nonzero_idx) > 0: + last_valid = nonzero_idx[-1] + 1 + else: + last_valid = len(steps) + + # position + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + label="Target Pos", + linestyle="--", + linewidth=1.5, + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + label="Actual Pos", + linewidth=1.5, + ) + pos_ax.set_ylabel(f"{joint_names[j]} Pos (rad)") + pos_ax.legend(loc="upper right", fontsize=8) + pos_ax.grid(True, linestyle="--", alpha=0.4) + + # torque + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_ylabel(f"{joint_names[j]} Torque (Nm)") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + axs[-1].set_xlabel("Step") + plt.suptitle(f"{group_name.capitalize()} Joint Logs", fontsize=14) + save_path = os.path.join(SAVE_DIR, f"{group_name}.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {group_name}.png") + + +# generate plots +for group_name, joint_indices in groups.items(): + if max(joint_indices) < num_joints: + plot_joint_group(group_name, joint_indices) + else: + print(f"Skipping {group_name}: exceeds joint index range.") From 795cec0c37ecac08cb98762bf2d23e7f81772ec9 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 14 Nov 2025 00:13:57 -0800 Subject: [PATCH 20/35] log obs scaling --- .gitignore | 2 + scripts/play.py | 103 ++++++++++++++++++++++++++++++++---- scripts/plot_obs_scaling.py | 42 +++++++++++++++ 3 files changed, 136 insertions(+), 11 deletions(-) create mode 100644 scripts/plot_obs_scaling.py diff --git a/.gitignore b/.gitignore index 9d6c8a7e..2f256dfd 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,8 @@ gym/wandb user/wandb_config.json *trajectories/ *.png +obs_scaling_output/ +obs_scaling_comparison/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/scripts/play.py b/scripts/play.py index 9d7380ae..703437b1 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -1,4 +1,5 @@ from gym.envs import __init__ # noqa: F401 +from gym.envs.mini_cheetah.mini_cheetah_config import MiniCheetahCfg from gym.utils import get_args, task_registry from gym.utils import KeyboardInterface from gym.utils import VisualizationRecorder @@ -6,7 +7,6 @@ # torch needs to be imported after isaacgym imports in local source import torch import numpy as np -import os def setup(args): @@ -31,11 +31,55 @@ def setup(args): return env, runner, train_cfg -def create_logging_dict(env, num_steps): +def create_obs_logging_dict(env, obs_vars, num_steps, scaling_cfg=None): """ - Creates a dictionary of tensors to store joint data for each timestep. + Create a dictionary to log raw and scaled observation data. + + Returns: + obs_log: dict with keys: + {var}_raw -> [num_envs, num_steps, ...] + {var}_scaled -> [num_envs, num_steps, ...] """ num_envs = env.num_envs + obs_log = {} + + for var in obs_vars: + val = getattr(env, var) + shape = (num_envs, num_steps) + val.shape[1:] # preserve timestep array shape + + # allocate tensors + obs_log[f"{var}_raw"] = torch.zeros(shape, device=val.device) + obs_log[f"{var}_scaled"] = torch.zeros_like(obs_log[f"{var}_raw"]) + + return obs_log + + +def log_obs_step(env, obs_log, obs_vars, step_idx, scaling_cfg=None): + # log one timestep of observations + + for var in obs_vars: + val = getattr(env, var) # shape: [num_envs, N] or [num_envs, N, M] + + # raw + obs_log[f"{var}_raw"][:, step_idx, ...] = val + + # scaled + scale = None + if scaling_cfg is not None and hasattr(scaling_cfg, var): + scale = torch.tensor(getattr(scaling_cfg, var), device=val.device) + if scale is not None: + # scale to match val shape + while scale.ndim < val.ndim: + scale = scale.unsqueeze(0) + obs_log[f"{var}_scaled"][:, step_idx, ...] = val / scale + else: + # no scaling available -> just copy raw + obs_log[f"{var}_scaled"][:, step_idx, ...] = val + + +def create_logging_dict(env, num_steps): + # creates a dictionary of tensors to store joint data for each timestep. + num_envs = env.num_envs num_dofs = env.num_dof log_dict = { @@ -59,6 +103,23 @@ def create_logging_dict(env, num_steps): def play(env, runner, train_cfg): num_steps = int(env.max_episode_length) log_data = create_logging_dict(env, num_steps) + scaling = MiniCheetahCfg.scaling # NOTE: should change this to be an input + + obs_vars = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + + obs_log = create_obs_logging_dict(env, obs_vars, num_steps, scaling_cfg=scaling) + + # track actual number of simulation steps + actual_steps = 0 # * set up recording if env.cfg.viewer.record: @@ -86,11 +147,18 @@ def play(env, runner, train_cfg): ) env.step() - if i < num_steps: # avoid overflow if loop is longer than episode - log_data["step"][i] = i - log_data["target_pos"][:, i, :] = env.dof_pos_target - log_data["actual_pos"][:, i, :] = env.dof_pos - log_data["torque"][:, i, :] = env.torques + # log only actual simulation steps + if actual_steps < num_steps: + # log joints + log_data["step"][actual_steps] = actual_steps + log_data["target_pos"][:, actual_steps, :] = env.dof_pos_target + log_data["actual_pos"][:, actual_steps, :] = env.dof_pos + log_data["torque"][:, actual_steps, :] = env.torques + + # log observations + log_obs_step(env, obs_log, obs_vars, actual_steps, scaling_cfg=scaling) + + actual_steps += 1 env.check_exit() # user exit or viewer closed @@ -99,13 +167,26 @@ def play(env, runner, train_cfg): except SystemExit: print("\n[INFO] Viewer closed, saving logs...") finally: - save_path = os.path.join(os.getcwd(), "joint_logs.npz") + # slice to actual steps before saving log_data_cpu = { k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v) for k, v in log_data.items() } - np.savez_compressed(save_path, **log_data_cpu) - print(f"\nSaved joint log to {save_path}") + for key in ["step", "target_pos", "actual_pos", "torque"]: + log_data_cpu[key] = log_data_cpu[key][:actual_steps] + + np.savez_compressed("joint_logs.npz", **log_data_cpu) + print(f"\nSaved joint log to joint_logs.npz ({actual_steps} steps)") + + obs_log_cpu = { + k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v) + for k, v in obs_log.items() + } + for key in obs_log_cpu.keys(): + obs_log_cpu[key] = obs_log_cpu[key][:, :actual_steps, ...] + + np.savez_compressed("obs_logs.npz", **obs_log_cpu) + print(f"\nSaved obs log to obs_logs.npz ({actual_steps} steps)") if __name__ == "__main__": diff --git a/scripts/plot_obs_scaling.py b/scripts/plot_obs_scaling.py new file mode 100644 index 00000000..dd3f7192 --- /dev/null +++ b/scripts/plot_obs_scaling.py @@ -0,0 +1,42 @@ +import numpy as np +import os + +LOG_FILE = "obs_logs.npz" +ENV_ID = 0 +SAVE_DIR = "obs_scaling_output" +os.makedirs(SAVE_DIR, exist_ok=True) + +data = np.load(LOG_FILE, allow_pickle=True) + +# extract observation variables from keys +obs_vars = set() +for key in data.keys(): + print(key) + if key.endswith("_raw"): + obs_vars.add(key[:-4]) # remove '_raw' suffix + +print(f"Found observation variables: {obs_vars}") + + +def save_txt(var_name, values, save_dir): + """save multi-component array to a single .txt file""" + if values.ndim > 2: + # reshape to 2D: timesteps × components + values_2d = values.reshape(values.shape[0], -1) + else: + values_2d = values + save_path = os.path.join(save_dir, f"{var_name}.txt") + np.savetxt(save_path, values_2d, fmt="%.6f") + print(f"[saved txt] {save_path}") + + +for var in obs_vars: + raw_key = f"{var}_raw" + scaled_key = f"{var}_scaled" + + raw = data[raw_key][ENV_ID] + scaled = data[scaled_key][ENV_ID] if scaled_key in data else raw + + # save raw and scaled as separate .txt files + save_txt(f"{var}_raw", raw, SAVE_DIR) + save_txt(f"{var}_scaled", scaled, SAVE_DIR) From 5d65cc9685666f7e9cc00a7547354103679717a5 Mon Sep 17 00:00:00 2001 From: sheim Date: Mon, 17 Nov 2025 11:20:45 -0500 Subject: [PATCH 21/35] update scaling --- gym/envs/mini_cheetah/mini_cheetah_config.py | 53 +++++++++++-------- gym/envs/mini_cheetah/mini_cheetah_ref.py | 6 +-- .../urdf/mini_cheetah_simple.urdf | 8 +-- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index f6a20586..71555f37 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -37,33 +37,42 @@ class init_state(LeggedRobotCfg.init_state): ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] # * initialization for random range setup + # these are the physical limits in the URDF as of 17 Nov 2025 + # dof_pos_range = { + # "haa": [-0.2, 0.2], + # "hfe": [-0.7, 0.6], + # "kfe": [-1.3, 0.1], + # "pfe": [-0.3, 2.2], + # "pastern_to_foot": [-0.3, 1.8], + # "base_joint": [-0.2, 0.2], + # } dof_pos_range = { "haa": [-0.2, 0.2], - "hfe": [-0.5, 0.5], - "kfe": [0.0, 0.0], - "pfe": [0.0, 0.0], - "pastern_to_foot": [0.0, 0.0], - "base_joint": [0.0, 0.0], + "hfe": [-0.7, 0.6], + "kfe": [-1.3, 0.1], + "pfe": [-0.3, 2.2], + "pastern_to_foot": [-0.3, 1.8], + "base_joint": [-0.2, 0.2], } dof_vel_range = { - "haa": [0.0, 0.0], - "hfe": [0.0, 0.0], - "kfe": [0.0, 0.0], - "pfe": [0.0, 0.0], - "pastern_to_foot": [0.0, 0.0], - "base_joint": [0.0, 0.0], + "haa": [-0.2, 0.2], + "hfe": [-0.2, 0.2], + "kfe": [-0.2, 0.2], + "pfe": [-0.2, 0.2], + "pastern_to_foot": [-0.2, 0.2], + "base_joint": [-0.2, 0.2], } root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y [1.3, 1.3], # z - [0.0, 0.0], # roll - [0.0, 0.0], # pitch - [0.0, 0.0], # yaw + [-0.2, 0.2], # roll + [-0.2, 0.2], # pitch + [-0.2, 0.2], # yaw ] root_vel_range = [ - [-0.5, 2.0], # x + [-0.5, 5.0], # x [0.0, 0.0], # y [-0.05, 0.05], # z [0.0, 0.0], # roll @@ -147,12 +156,10 @@ class scaling(LeggedRobotCfg.scaling): base_lin_vel = BASE_HEIGHT_REF dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) base_height = BASE_HEIGHT_REF - dof_pos = [0.2] + ( - 4 * [1.0, 1.0, 1.0, 1.0, 1.0] - ) # i think these values were too small? + dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1]) dof_pos_obs = dof_pos - dof_pos_target = [0.2] + (4 * [0.4, 0.4, 0.4, 0.2, 0.2]) # target joint angles - tau_ff = [3600] + (4 * [3600, 3600, 400000, 400000, 5800]) # not being used + dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used commands = [3, 1, 3, 1] # add height as a command @@ -215,8 +222,8 @@ class weights: torques = 5.0e-6 dof_vel = 0.0 min_base_height = 1.5 - action_rate = 0.1 - action_rate2 = 0.01 + action_rate = 0.01 + action_rate2 = 0.001 stand_still = 0.0 dof_pos_limits = 0.0 feet_contact_forces = 0.0 @@ -253,6 +260,6 @@ class algorithm: class runner(LeggedRobotRunnerCfg.runner): run_name = "" experiment_name = "mini_cheetah" - max_iterations = 500 + max_iterations = 1000 algorithm_class_name = "PPO2" num_steps_per_env = 32 diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref.py b/gym/envs/mini_cheetah/mini_cheetah_ref.py index 57e1f0e7..a1a3ca13 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_ref.py +++ b/gym/envs/mini_cheetah/mini_cheetah_ref.py @@ -59,8 +59,7 @@ def _switch(self): def _reward_swing_grf(self): """Reward non-zero grf during swing (0 to pi)""" in_contact = torch.gt( - torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), - 50.0, + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0 ) ph_off = torch.lt(self.phase, torch.pi) rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1) @@ -69,8 +68,7 @@ def _reward_swing_grf(self): def _reward_stance_grf(self): """Reward non-zero grf during stance (pi to 2pi)""" in_contact = torch.gt( - torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), - 50.0, + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0 ) ph_off = torch.gt(self.phase, torch.pi) # should this be in swing? rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1) diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 4b05e72c..224bb330 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -80,7 +80,7 @@ - + @@ -211,7 +211,7 @@ - + @@ -341,7 +341,7 @@ - + @@ -471,7 +471,7 @@ - + From 4414757cc94fecd1d0d0af779d877a8034346cce Mon Sep 17 00:00:00 2001 From: sheim Date: Mon, 17 Nov 2025 14:38:34 -0500 Subject: [PATCH 22/35] split off horse on its own --- gym/envs/__init__.py | 4 + gym/envs/horse/horse.py | 78 ++ gym/envs/horse/horse_config.py | 262 +++++ gym/envs/mini_cheetah/mini_cheetah.py | 18 - gym/envs/mini_cheetah/mini_cheetah_config.py | 131 +-- .../mini_cheetah/mini_cheetah_osc_config.py | 2 +- resources/robots/horse/urdf/horse.urdf | 576 ++++++++++ .../urdf/mini_cheetah_simple.urdf | 1006 +++++++---------- 8 files changed, 1386 insertions(+), 691 deletions(-) create mode 100644 gym/envs/horse/horse.py create mode 100644 gym/envs/horse/horse_config.py create mode 100644 resources/robots/horse/urdf/horse.urdf diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index f9d8b788..c7293b71 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -15,6 +15,7 @@ "MiniCheetah": ".mini_cheetah.mini_cheetah", "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref", "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc", + "Horse": ".horse.horse", "MIT_Humanoid": ".mit_humanoid.mit_humanoid", "Anymal": ".anymal_c.anymal", "A1": ".a1.a1", @@ -28,6 +29,7 @@ "MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config", "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACCfg": ".mini_cheetah.mini_cheetah_SAC_config", + "HorseCfg": ".horse.horse_config", "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config", "A1Cfg": ".a1.a1_config", "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -42,6 +44,7 @@ "MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config", "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACRunnerCfg": ".mini_cheetah.mini_cheetah_SAC_config", + "HorseRunnerCfg": ".horse.horse_config", "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config", "A1RunnerCfg": ".a1.a1_config", "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -68,6 +71,7 @@ "MiniCheetahSACCfg", "MiniCheetahSACRunnerCfg" ], + "horse": ["Horse", "HorseCfg", "HorseRunnerCfg"], "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"], "humanoid_running": [ "HumanoidRunning", diff --git a/gym/envs/horse/horse.py b/gym/envs/horse/horse.py new file mode 100644 index 00000000..bb2e545e --- /dev/null +++ b/gym/envs/horse/horse.py @@ -0,0 +1,78 @@ +import torch + +from isaacgym.torch_utils import torch_rand_float +from gym.envs.base.legged_robot import LeggedRobot + + +class Horse(LeggedRobot): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + + def _reward_lin_vel_z(self): + """Penalize z axis base linear velocity with squared exp""" + return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"]) + + def _reward_ang_vel_xy(self): + """Penalize xy axes base angular velocity""" + error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"]) + return torch.sum(error, dim=1) + + def _reward_orientation(self): + """Penalize non-flat base orientation""" + error = ( + torch.square(self.projected_gravity[:, :2]) + / self.cfg.reward_settings.tracking_sigma + ) + return torch.sum(torch.exp(-error), dim=1) + + def _reward_min_base_height(self): + """Squared exponential saturating at base_height target""" + error = self.base_height - self.cfg.reward_settings.base_height_target + error /= self.scales["base_height"] + error = torch.clamp(error, max=0, min=None).flatten() + return self._sqrdexp(error) + + def _reward_tracking_lin_vel(self): + """Tracking of linear velocity commands (xy axes)""" + # just use lin_vel? + error = self.commands[:, :2] - self.base_lin_vel[:, :2] + # * scale by (1+|cmd|): if cmd=0, no scaling. + error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2])) + error = torch.sum(torch.square(error), dim=1) + return torch.exp(-error / self.cfg.reward_settings.tracking_sigma) + + def _reward_tracking_ang_vel(self): + """Tracking of angular velocity commands (yaw)""" + ang_vel_error = torch.square( + (self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0 + ) + return self._sqrdexp(ang_vel_error) + + def _reward_dof_vel(self): + """Penalize dof velocities""" + return torch.sum(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1) + + def _reward_dof_near_home(self): + return torch.sum( + self._sqrdexp( + (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"] + ), + dim=1, + ) + + def _resample_commands(self, env_ids): + super()._resample_commands(env_ids) + + # resample height + height_range = self.command_ranges["height"] + self.commands[env_ids, 3] = torch_rand_float( + height_range[0], height_range[1], (len(env_ids), 1), device=self.device + ).squeeze(1) + + def _reward_tracking_height(self): + """Reward for base height.""" + # error between current and commanded height + error = self.base_height.flatten() - self.commands[:, 3].flatten() + error /= self.scales["base_height"] + + return self._sqrdexp(error) diff --git a/gym/envs/horse/horse_config.py b/gym/envs/horse/horse_config.py new file mode 100644 index 00000000..596c2c88 --- /dev/null +++ b/gym/envs/horse/horse_config.py @@ -0,0 +1,262 @@ +from gym.envs.base.legged_robot_config import ( + LeggedRobotCfg, + LeggedRobotRunnerCfg, +) + +BASE_HEIGHT_REF = 1.3 + + +class HorseCfg(LeggedRobotCfg): + class env(LeggedRobotCfg.env): + num_envs = 2**12 + num_actuators = 1 + 4 * 5 + episode_length_s = 10 + + class terrain(LeggedRobotCfg.terrain): + mesh_type = "plane" + + class init_state(LeggedRobotCfg.init_state): + default_joint_angles = { + "haa": 0.0, + "hfe": 0.0, + "kfe": 0.0, + "pfe": 0.0, + "pastern_to_foot": 0.0, + "base_joint": 0.0, + } + + # * reset setup chooses how the initial conditions are chosen. + # * "reset_to_basic" = a single position + # * "reset_to_range" = uniformly random from a range defined below + reset_mode = "reset_to_range" + + # * default COM for basic initialization + pos = [0.0, 0.0, 1.4] # x,y,z [m] + rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] + lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] + ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] + + # * initialization for random range setup + # these are the physical limits in the URDF as of 17 Nov 2025 + # dof_pos_range = { + # "haa": [-0.2, 0.2], + # "hfe": [-0.7, 0.6], + # "kfe": [-1.3, 0.1], + # "pfe": [-0.3, 2.2], + # "pastern_to_foot": [-0.3, 1.8], + # "base_joint": [-0.2, 0.2], + # } + dof_pos_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.7, 0.6], + "kfe": [-1.3, 0.1], + "pfe": [-0.3, 2.2], + "pastern_to_foot": [-0.3, 1.8], + "base_joint": [-0.2, 0.2], + } + dof_vel_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.2, 0.2], + "kfe": [-0.2, 0.2], + "pfe": [-0.2, 0.2], + "pastern_to_foot": [-0.2, 0.2], + "base_joint": [-0.2, 0.2], + } + + root_pos_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [1.3, 1.3], # z + [-0.2, 0.2], # roll + [-0.2, 0.2], # pitch + [-0.2, 0.2], # yaw + ] + root_vel_range = [ + [-0.5, 5.0], # x + [0.0, 0.0], # y + [-0.05, 0.05], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + + class control(LeggedRobotCfg.control): + # * PD Drive parameters: + stiffness = { + "haa": 4000, + "hfe": 4000, + "kfe": 4000, + "pfe": 4000, + "pastern_to_foot": 4000, + "base_joint": 50, + } + damping = { + "haa": 250, + "hfe": 250, + "kfe": 250, + "pfe": 250, + "pastern_to_foot": 250, + "base_joint": 10, + } + ctrl_frequency = 500 # how often the PDF controller/action updates run + desired_sim_frequency = 1000 # how often the physics is calculated + + class commands: + # * time before command are changed[s] + resampling_time = 3.0 + + class ranges: + lin_vel_x = [-1.0, 8.0] # min max [m/s] + lin_vel_y = 1.0 # max [m/s] + yaw_vel = 6 # max [rad/s] + height = [0.61, 1.30] # m + + class push_robots: + toggle = False + interval_s = 1 + max_push_vel_xy = 0.5 + push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] + + class domain_rand: + randomize_friction = True + friction_range = [0.5, 5.0] + randomize_base_mass = False + added_mass_range = [-1.0, 1.0] + + class asset(LeggedRobotCfg.asset): + file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" + foot_name = "foot" + penalize_contacts_on = ["shank"] + terminate_after_contacts_on = ["base"] + end_effector_names = ["foot"] + collapse_fixed_joints = False + self_collisions = 1 + flip_visual_attachments = False + disable_gravity = False + disable_motors = False + joint_damping = 0.3 + fix_base_link = False + rotor_inertia = [0.002268] + 4 * ( + [0.002268, 0.002268, 0.005484, 0.005484, 0.005484] + ) + + class reward_settings(LeggedRobotCfg.reward_settings): + soft_dof_pos_limit = 0.9 + soft_dof_vel_limit = 0.9 + soft_torque_limit = 0.9 + max_contact_force = 600.0 + base_height_target = BASE_HEIGHT_REF + tracking_sigma = 0.25 + + class scaling(LeggedRobotCfg.scaling): + base_ang_vel = 0.3 + base_lin_vel = BASE_HEIGHT_REF + dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1]) + dof_pos_obs = dof_pos + dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used + commands = [3, 1, 3, 1] # add height as a command + + +class HorseRunnerCfg(LeggedRobotRunnerCfg): + seed = -1 + runner_class_name = "OnPolicyRunner" + + class actor(LeggedRobotRunnerCfg.actor): + hidden_dims = [256, 256, 128] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + normalize_obs = True + actions = ["dof_pos_target"] + add_noise = True + disable_actions = False + + class noise: + scale = 1.0 + dof_pos_obs = 0.01 + base_ang_vel = 0.01 + dof_pos = 0.005 + dof_vel = 0.005 + lin_vel = 0.05 + ang_vel = [0.3, 0.15, 0.4] + gravity_vec = 0.1 + + class critic(LeggedRobotRunnerCfg.critic): + hidden_dims = [128, 64] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + normalize_obs = True + + class reward: + class weights: + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 + lin_vel_z = 0.0 + ang_vel_xy = 0.01 + orientation = 1.0 + torques = 5.0e-6 + dof_vel = 0.0 + min_base_height = 1.5 + action_rate = 0.01 + action_rate2 = 0.001 + stand_still = 0.0 + dof_pos_limits = 0.0 + feet_contact_forces = 0.0 + dof_near_home = 0.0 + tracking_height = 1.5 + + class termination_weight: + termination = 0.01 + + class algorithm(LeggedRobotRunnerCfg.algorithm): + class algorithm: + # both + gamma = 0.99 + lam = 0.95 + # shared + batch_size = 2**15 + max_gradient_steps = 24 + # new + storage_size = 2**17 # new + batch_size = 2**15 # new + + clip_param = 0.2 + learning_rate = 1.0e-3 + max_grad_norm = 1.0 + # Critic + use_clipped_value_loss = True + # Actor + entropy_coef = 0.01 + schedule = "adaptive" # could be adaptive, fixed + desired_kl = 0.01 + lr_range = [2e-4, 1e-2] + lr_ratio = 1.3 + + class runner(LeggedRobotRunnerCfg.runner): + run_name = "" + experiment_name = "horse" + max_iterations = 1000 + algorithm_class_name = "PPO2" + num_steps_per_env = 32 diff --git a/gym/envs/mini_cheetah/mini_cheetah.py b/gym/envs/mini_cheetah/mini_cheetah.py index 06deb5a0..645e6093 100644 --- a/gym/envs/mini_cheetah/mini_cheetah.py +++ b/gym/envs/mini_cheetah/mini_cheetah.py @@ -1,6 +1,5 @@ import torch -from isaacgym.torch_utils import torch_rand_float from gym.envs.base.legged_robot import LeggedRobot @@ -59,20 +58,3 @@ def _reward_dof_near_home(self): ), dim=1, ) - - def _resample_commands(self, env_ids): - super()._resample_commands(env_ids) - - # resample height - height_range = self.command_ranges["height"] - self.commands[env_ids, 3] = torch_rand_float( - height_range[0], height_range[1], (len(env_ids), 1), device=self.device - ).squeeze(1) - - def _reward_tracking_height(self): - """Reward for base height.""" - # error between current and commanded height - error = self.base_height.flatten() - self.commands[:, 3].flatten() - error /= self.scales["base_height"] - - return self._sqrdexp(error) diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 71555f37..7608e080 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -3,13 +3,13 @@ LeggedRobotRunnerCfg, ) -BASE_HEIGHT_REF = 1.3 +BASE_HEIGHT_REF = 0.3 class MiniCheetahCfg(LeggedRobotCfg): class env(LeggedRobotCfg.env): num_envs = 2**12 - num_actuators = 1 + 4 * 5 + num_actuators = 12 episode_length_s = 10 class terrain(LeggedRobotCfg.terrain): @@ -18,11 +18,8 @@ class terrain(LeggedRobotCfg.terrain): class init_state(LeggedRobotCfg.init_state): default_joint_angles = { "haa": 0.0, - "hfe": 0.0, - "kfe": 0.0, - "pfe": 0.0, - "pastern_to_foot": 0.0, - "base_joint": 0.0, + "hfe": -0.785398, + "kfe": 1.596976, } # * reset setup chooses how the initial conditions are chosen. @@ -31,48 +28,28 @@ class init_state(LeggedRobotCfg.init_state): reset_mode = "reset_to_range" # * default COM for basic initialization - pos = [0.0, 0.0, 1.4] # x,y,z [m] + pos = [0.0, 0.0, 0.35] # x,y,z [m] rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] # * initialization for random range setup - # these are the physical limits in the URDF as of 17 Nov 2025 - # dof_pos_range = { - # "haa": [-0.2, 0.2], - # "hfe": [-0.7, 0.6], - # "kfe": [-1.3, 0.1], - # "pfe": [-0.3, 2.2], - # "pastern_to_foot": [-0.3, 1.8], - # "base_joint": [-0.2, 0.2], - # } dof_pos_range = { - "haa": [-0.2, 0.2], - "hfe": [-0.7, 0.6], - "kfe": [-1.3, 0.1], - "pfe": [-0.3, 2.2], - "pastern_to_foot": [-0.3, 1.8], - "base_joint": [-0.2, 0.2], + "haa": [-0.01, 0.01], + "hfe": [-0.785398, -0.785398], + "kfe": [1.596976, 1.596976], } - dof_vel_range = { - "haa": [-0.2, 0.2], - "hfe": [-0.2, 0.2], - "kfe": [-0.2, 0.2], - "pfe": [-0.2, 0.2], - "pastern_to_foot": [-0.2, 0.2], - "base_joint": [-0.2, 0.2], - } - + dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]} root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y - [1.3, 1.3], # z - [-0.2, 0.2], # roll - [-0.2, 0.2], # pitch - [-0.2, 0.2], # yaw + [0.35, 0.35], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw ] root_vel_range = [ - [-0.5, 5.0], # x + [-0.5, 2.0], # x [0.0, 0.0], # y [-0.05, 0.05], # z [0.0, 0.0], # roll @@ -82,34 +59,19 @@ class init_state(LeggedRobotCfg.init_state): class control(LeggedRobotCfg.control): # * PD Drive parameters: - stiffness = { - "haa": 4000, - "hfe": 4000, - "kfe": 4000, - "pfe": 4000, - "pastern_to_foot": 4000, - "base_joint": 50, - } - damping = { - "haa": 250, - "hfe": 250, - "kfe": 250, - "pfe": 250, - "pastern_to_foot": 250, - "base_joint": 10, - } - ctrl_frequency = 500 # how often the PDF controller/action updates run - desired_sim_frequency = 1000 # how often the physics is calculated + stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0} + damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5} + ctrl_frequency = 100 + desired_sim_frequency = 500 class commands: # * time before command are changed[s] resampling_time = 3.0 class ranges: - lin_vel_x = [-1.0, 8.0] # min max [m/s] + lin_vel_x = [-2.0, 3.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] - yaw_vel = 6 # max [rad/s] - height = [0.61, 1.30] # m + yaw_vel = 3 # max [rad/s] class push_robots: toggle = False @@ -119,7 +81,7 @@ class push_robots: class domain_rand: randomize_friction = True - friction_range = [0.5, 5.0] + friction_range = [0.5, 1.0] randomize_base_mass = False added_mass_range = [-1.0, 1.0] @@ -137,11 +99,8 @@ class asset(LeggedRobotCfg.asset): flip_visual_attachments = False disable_gravity = False disable_motors = False - joint_damping = 0.3 - fix_base_link = False - rotor_inertia = [0.002268] + 4 * ( - [0.002268, 0.002268, 0.005484, 0.005484, 0.005484] - ) + joint_damping = 0.1 + rotor_inertia = [0.002268, 0.002268, 0.005484] * 4 class reward_settings(LeggedRobotCfg.reward_settings): soft_dof_pos_limit = 0.9 @@ -154,13 +113,13 @@ class reward_settings(LeggedRobotCfg.reward_settings): class scaling(LeggedRobotCfg.scaling): base_ang_vel = 0.3 base_lin_vel = BASE_HEIGHT_REF - dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) - base_height = BASE_HEIGHT_REF - dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1]) + dof_vel = 4 * [2.0, 2.0, 4.0] + base_height = 0.3 + dof_pos = 4 * [0.2, 0.3, 0.3] dof_pos_obs = dof_pos - dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles - tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used - commands = [3, 1, 3, 1] # add height as a command + dof_pos_target = 4 * [0.2, 0.3, 0.3] + tau_ff = 4 * [18, 18, 28] + commands = [3, 1, 3, 1] class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg): @@ -172,8 +131,6 @@ class actor(LeggedRobotRunnerCfg.actor): # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid activation = "elu" obs = [ - "base_height", - "base_lin_vel", "base_ang_vel", "projected_gravity", "commands", @@ -222,44 +179,22 @@ class weights: torques = 5.0e-6 dof_vel = 0.0 min_base_height = 1.5 - action_rate = 0.01 - action_rate2 = 0.001 + action_rate = 0.1 + action_rate2 = 0.01 stand_still = 0.0 dof_pos_limits = 0.0 feet_contact_forces = 0.0 dof_near_home = 0.0 - tracking_height = 1.5 class termination_weight: termination = 0.01 class algorithm(LeggedRobotRunnerCfg.algorithm): - class algorithm: - # both - gamma = 0.99 - lam = 0.95 - # shared - batch_size = 2**15 - max_gradient_steps = 24 - # new - storage_size = 2**17 # new - batch_size = 2**15 # new - - clip_param = 0.2 - learning_rate = 1.0e-3 - max_grad_norm = 1.0 - # Critic - use_clipped_value_loss = True - # Actor - entropy_coef = 0.01 - schedule = "adaptive" # could be adaptive, fixed - desired_kl = 0.01 - lr_range = [2e-4, 1e-2] - lr_ratio = 1.3 + pass class runner(LeggedRobotRunnerCfg.runner): run_name = "" experiment_name = "mini_cheetah" - max_iterations = 1000 + max_iterations = 500 algorithm_class_name = "PPO2" num_steps_per_env = 32 diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py index 2c32f06b..e8796147 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py @@ -163,7 +163,7 @@ class scaling(MiniCheetahCfg.scaling): dof_pos_target = 4 * [0.2, 0.3, 0.3] tau_ff = 4 * [18, 18, 28] # hip-abad, hip-pitch, knee # commands = [base_lin_vel, base_lin_vel, base_ang_vel] - commands = [3, 1, 3] # [base_lin_vel, base_lin_vel, base_ang_vel] + commands = [3, 1, 3, 1] # [base_lin_vel, base_lin_vel, base_ang_vel] class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg): diff --git a/resources/robots/horse/urdf/horse.urdf b/resources/robots/horse/urdf/horse.urdf new file mode 100644 index 00000000..224bb330 --- /dev/null +++ b/resources/robots/horse/urdf/horse.urdf @@ -0,0 +1,576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf index 224bb330..e5836117 100644 --- a/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf +++ b/resources/robots/mini_cheetah/urdf/mini_cheetah_simple.urdf @@ -1,576 +1,434 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From f6741623d8d4f9dda85b943085fa2cfc33589f8a Mon Sep 17 00:00:00 2001 From: sheim Date: Mon, 17 Nov 2025 15:14:01 -0500 Subject: [PATCH 23/35] fix play script --- gym/envs/horse/horse_config.py | 10 +++++----- scripts/play.py | 31 ++++++------------------------- 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/gym/envs/horse/horse_config.py b/gym/envs/horse/horse_config.py index 596c2c88..699aff5f 100644 --- a/gym/envs/horse/horse_config.py +++ b/gym/envs/horse/horse_config.py @@ -98,8 +98,8 @@ class control(LeggedRobotCfg.control): "pastern_to_foot": 250, "base_joint": 10, } - ctrl_frequency = 500 # how often the PDF controller/action updates run - desired_sim_frequency = 1000 # how often the physics is calculated + ctrl_frequency = 250 # how often the PDF controller/action updates run + desired_sim_frequency = 500 # how often the physics is calculated class commands: # * time before command are changed[s] @@ -216,11 +216,11 @@ class weights: lin_vel_z = 0.0 ang_vel_xy = 0.01 orientation = 1.0 - torques = 5.0e-6 + torques = 5.0e-7 dof_vel = 0.0 min_base_height = 1.5 - action_rate = 0.01 - action_rate2 = 0.001 + action_rate = 1e-4 + action_rate2 = 1e-5 stand_still = 0.0 dof_pos_limits = 0.0 feet_contact_forces = 0.0 diff --git a/scripts/play.py b/scripts/play.py index 703437b1..7b6f8d98 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -1,5 +1,4 @@ from gym.envs import __init__ # noqa: F401 -from gym.envs.mini_cheetah.mini_cheetah_config import MiniCheetahCfg from gym.utils import get_args, task_registry from gym.utils import KeyboardInterface from gym.utils import VisualizationRecorder @@ -31,7 +30,7 @@ def setup(args): return env, runner, train_cfg -def create_obs_logging_dict(env, obs_vars, num_steps, scaling_cfg=None): +def create_obs_logging_dict(env, obs_vars, num_steps): """ Create a dictionary to log raw and scaled observation data. @@ -54,27 +53,10 @@ def create_obs_logging_dict(env, obs_vars, num_steps, scaling_cfg=None): return obs_log -def log_obs_step(env, obs_log, obs_vars, step_idx, scaling_cfg=None): - # log one timestep of observations - +def log_obs_step(env, obs_log, obs_vars, step_idx): for var in obs_vars: - val = getattr(env, var) # shape: [num_envs, N] or [num_envs, N, M] - - # raw - obs_log[f"{var}_raw"][:, step_idx, ...] = val - - # scaled - scale = None - if scaling_cfg is not None and hasattr(scaling_cfg, var): - scale = torch.tensor(getattr(scaling_cfg, var), device=val.device) - if scale is not None: - # scale to match val shape - while scale.ndim < val.ndim: - scale = scale.unsqueeze(0) - obs_log[f"{var}_scaled"][:, step_idx, ...] = val / scale - else: - # no scaling available -> just copy raw - obs_log[f"{var}_scaled"][:, step_idx, ...] = val + obs_log[f"{var}_raw"][:, step_idx, ...] = getattr(env, var) + obs_log[f"{var}_scaled"][:, step_idx, ...] = env.get_state(var) def create_logging_dict(env, num_steps): @@ -103,7 +85,6 @@ def create_logging_dict(env, num_steps): def play(env, runner, train_cfg): num_steps = int(env.max_episode_length) log_data = create_logging_dict(env, num_steps) - scaling = MiniCheetahCfg.scaling # NOTE: should change this to be an input obs_vars = [ "base_height", @@ -116,7 +97,7 @@ def play(env, runner, train_cfg): "dof_pos_target", ] - obs_log = create_obs_logging_dict(env, obs_vars, num_steps, scaling_cfg=scaling) + obs_log = create_obs_logging_dict(env, obs_vars, num_steps) # track actual number of simulation steps actual_steps = 0 @@ -156,7 +137,7 @@ def play(env, runner, train_cfg): log_data["torque"][:, actual_steps, :] = env.torques # log observations - log_obs_step(env, obs_log, obs_vars, actual_steps, scaling_cfg=scaling) + log_obs_step(env, obs_log, obs_vars, actual_steps) actual_steps += 1 From c7eb06f0c4ac502decee1f1d376a883ba138383e Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Tue, 18 Nov 2025 00:51:38 -0800 Subject: [PATCH 24/35] add horse_osc --- gym/envs/__init__.py | 8 + gym/envs/horse/horse_osc.py | 526 +++++++++++++++++++++++++++++ gym/envs/horse/horse_osc_config.py | 318 +++++++++++++++++ scripts/plot_logs_by_joint.py | 110 ++++-- 4 files changed, 943 insertions(+), 19 deletions(-) create mode 100644 gym/envs/horse/horse_osc.py create mode 100644 gym/envs/horse/horse_osc_config.py diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index c7293b71..25b20da1 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -16,6 +16,7 @@ "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref", "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc", "Horse": ".horse.horse", + "HorseOsc": ".horse.horse_osc", "MIT_Humanoid": ".mit_humanoid.mit_humanoid", "Anymal": ".anymal_c.anymal", "A1": ".a1.a1", @@ -30,6 +31,7 @@ "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACCfg": ".mini_cheetah.mini_cheetah_SAC_config", "HorseCfg": ".horse.horse_config", + "HorseOscCfg": ".horse.horse_osc_config", "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config", "A1Cfg": ".a1.a1_config", "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -45,6 +47,7 @@ "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACRunnerCfg": ".mini_cheetah.mini_cheetah_SAC_config", "HorseRunnerCfg": ".horse.horse_config", + "HorseOscRunnerCfg": ".horse.horse_osc_config", "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config", "A1RunnerCfg": ".a1.a1_config", "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -72,6 +75,11 @@ "MiniCheetahSACRunnerCfg" ], "horse": ["Horse", "HorseCfg", "HorseRunnerCfg"], + "horse_osc": [ + "HorseOsc", + "HorseOscCfg", + "HorseOscRunnerCfg", + ], "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"], "humanoid_running": [ "HumanoidRunning", diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py new file mode 100644 index 00000000..ce5f8b08 --- /dev/null +++ b/gym/envs/horse/horse_osc.py @@ -0,0 +1,526 @@ +import torch +import numpy as np +from isaacgym.torch_utils import torch_rand_float + +from gym.envs.horse.horse import Horse + +from isaacgym import gymtorch + +HORSE_WEIGHT = 536.38 * 9.81 # Weight of horse in Newtons + + +class HorseOsc(Horse): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + + def _init_buffers(self): + super()._init_buffers() + self.oscillators = torch.zeros(self.num_envs, 4, device=self.device) + self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device) + + self.oscillators_vel = torch.zeros_like(self.oscillators) + self.grf = torch.zeros(self.num_envs, 4, device=self.device) + self.osc_omega = self.cfg.osc.omega * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_coupling = self.cfg.osc.coupling * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_offset = self.cfg.osc.offset * torch.ones( + self.num_envs, 1, device=self.device + ) + + def _reset_oscillators(self, env_ids): + if len(env_ids) == 0: + return + # * random + if self.cfg.osc.init_to == "random": + self.oscillators[env_ids] = torch_rand_float( + 0, + 2 * torch.pi, + shape=self.oscillators[env_ids].shape, + device=self.device, + ) + elif self.cfg.osc.init_to == "standing": + self.oscillators[env_ids] = 3 * torch.pi / 2 + elif self.cfg.osc.init_to == "trot": + self.oscillators[env_ids] = torch.tensor( + [0.0, torch.pi, torch.pi, 0.0], device=self.device + ) + elif self.cfg.osc.init_to == "pace": + self.oscillators[env_ids] = torch.tensor( + [0.0, torch.pi, 0.0, torch.pi], device=self.device + ) + if self.cfg.osc.init_w_offset: + self.oscillators[env_ids, :] += ( + torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1) + * 2 + * torch.pi + ) + elif self.cfg.osc.init_to == "pronk": + self.oscillators[env_ids, :] *= 0.0 + elif self.cfg.osc.init_to == "bound": + self.oscillators[env_ids, :] = torch.tensor( + [torch.pi, torch.pi, 0.0, 0.0], device=self.device + ) + else: + raise NotImplementedError + + if self.cfg.osc.init_w_offset: + self.oscillators[env_ids, :] += ( + torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1) + * 2 + * torch.pi + ) + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + + def _reset_system(self, env_ids): + if len(env_ids) == 0: + return + self._reset_oscillators(env_ids) + + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + # * keep some robots in the same starting state as they ended + timed_out_subset = (self.timed_out & ~self.terminated) * ( + torch.rand(self.num_envs, device=self.device) + < self.cfg.init_state.timeout_reset_ratio + ) + + env_ids = (self.terminated | timed_out_subset).nonzero().flatten() + if len(env_ids) == 0: + return + super()._reset_system(env_ids) + + def _pre_decimation_step(self): + super()._pre_decimation_step() + # self.grf = self._compute_grf() + if not self.cfg.osc.randomize_osc_params: + self.compute_osc_slope() + + def compute_osc_slope(self): + cmd_x = torch.abs(self.commands[:, 0:1]) - self.cfg.osc.stop_threshold + stop = cmd_x < 0 + + self.osc_offset = stop * self.cfg.osc.offset + self.osc_omega = ( + stop * self.cfg.osc.omega_stop + + torch.randn_like(self.osc_omega) * self.cfg.osc.omega_var + ) + self.osc_coupling = ( + stop * self.cfg.osc.coupling_stop + + torch.randn_like(self.osc_coupling) * self.cfg.osc.coupling_var + ) + + self.osc_omega += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.omega_slope + self.cfg.osc.omega_step, + min=0.0, + max=self.cfg.osc.omega_max, + ) + self.osc_coupling += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.coupling_slope + self.cfg.osc.coupling_step, + min=0.0, + max=self.cfg.osc.coupling_max, + ) + + self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) + self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) + + def _process_rigid_body_props(self, props, env_id): + if env_id == 0: + # * init buffers for the domain rand changes + self.mass = torch.zeros(self.num_envs, 1, device=self.device) + self.com = torch.zeros(self.num_envs, 3, device=self.device) + + # * randomize mass + if self.cfg.domain_rand.randomize_base_mass: + lower = self.cfg.domain_rand.lower_mass_offset + upper = self.cfg.domain_rand.upper_mass_offset + # self.mass_ + props[0].mass += np.random.uniform(lower, upper) + self.mass[env_id] = props[0].mass + # * randomize com position + lower = self.cfg.domain_rand.lower_z_offset + upper = self.cfg.domain_rand.upper_z_offset + props[0].com.z += np.random.uniform(lower, upper) + self.com[env_id, 2] = props[0].com.z + + lower = self.cfg.domain_rand.lower_x_offset + upper = self.cfg.domain_rand.upper_x_offset + props[0].com.x += np.random.uniform(lower, upper) + self.com[env_id, 0] = props[0].com.x + return props + + def _post_decimation_step(self): + """Update all states that are not handled in PhysX""" + super()._post_decimation_step() + self.grf = self._compute_grf() + # self._step_oscillators() + + def _post_physx_step(self): + super()._post_physx_step() + self._step_oscillators(self.dt / self.cfg.control.decimation) + return None + + def _step_oscillators(self, dt=None): + if dt is None: + dt = self.dt + + local_feedback = self.osc_coupling * ( + torch.cos(self.oscillators) + self.osc_offset + ) + grf = self._compute_grf() + self.oscillators_vel = self.osc_omega - grf * local_feedback + # self.oscillators_vel *= torch_rand_float(0.9, + # 1.1, + # self.oscillators_vel.shape, + # self.device) + self.oscillators_vel += ( + torch.randn(self.oscillators_vel.shape, device=self.device) + * self.cfg.osc.process_noise_std + ) + + self.oscillators_vel *= 2 * torch.pi + self.oscillators += ( + self.oscillators_vel * dt + ) # torch.clamp(self.oscillators_vel * dt, min=0) + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + def _resample_commands(self, env_ids): + """Randommly select commands of some environments + + Args: + env_ids (List[int]): Environments ids for which new commands are needed + """ + if len(env_ids) == 0: + return + super()._resample_commands(env_ids) + possible_commands = torch.tensor( + self.command_ranges["lin_vel_x"], device=self.device + ) + self.commands[env_ids, 0:1] = possible_commands[ + torch.randint( + 0, len(possible_commands), (len(env_ids), 1), device=self.device + ) + ] + # add some gaussian noise to the commands + self.commands[env_ids, 0:1] += ( + torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var + ) + + # possible_commands = torch.tensor(self.command_ranges["lin_vel_y"], + # device=self.device) + # self.commands[env_ids, 1:2] = possible_commands[torch.randint( + # 0, len(possible_commands), (len(env_ids), 1), + # device=self.device)] + # possible_commands = torch.tensor(self.command_ranges["yaw_vel"], + # device=self.device) + # self.commands[env_ids, 0:1] = possible_commands[torch.randint( + # 0, len(possible_commands), (len(env_ids), 1), + # device=self.device)] + + if 0 in self.cfg.commands.ranges.lin_vel_x: + # * with 20% chance, reset to 0 commands except for forward + self.commands[env_ids, 1:] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 20% chance, reset to 0 commands except for rotation + self.commands[env_ids, :2] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 10% chance, reset to 0 + self.commands[env_ids, :] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.9 + ).unsqueeze(1) + + if self.cfg.osc.randomize_osc_params: + self._resample_osc_params(env_ids) + + def _resample_osc_params(self, env_ids): + if len(env_ids) > 0: + self.osc_omega[env_ids, 0] = torch_rand_float( + self.cfg.osc.omega_range[0], + self.cfg.osc.omega_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_coupling[env_ids, 0] = torch_rand_float( + self.cfg.osc.coupling_range[0], + self.cfg.osc.coupling_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_offset[env_ids, 0] = torch_rand_float( + self.cfg.osc.offset_range[0], + self.cfg.osc.offset_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + + def perturb_base_velocity(self, velocity_delta, env_ids=None): + if env_ids is None: + env_ids = [range(self.num_envs)] + self.root_states[env_ids, 7:10] += velocity_delta + self.gym.set_actor_root_state_tensor( + self.sim, gymtorch.unwrap_tensor(self.root_states) + ) + + def _compute_grf(self, grf_norm=True): + grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) + if grf_norm: + return torch.clamp_max(grf / HORSE_WEIGHT, 1.0) + else: + return grf + + def _switch(self): + c_vel = torch.linalg.norm(self.commands, dim=1) + return torch.exp( + -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1)) + / self.cfg.reward_settings.switch_scale + ) + + def _reward_lin_vel_z(self): + """Penalize z axis base linear velocity with squared exp""" + return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"][0]) + + def _reward_ang_vel_xy(self): + """Penalize xy axes base angular velocity""" + error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"][0]) + return torch.sum(error, dim=1) + + # modified this method so joint index list uses horse 5 DOF legs, not 3 DOF + def _reward_cursorial(self): + """ + Encourage legs under body, joints stay neutral, avoid inverted/folded poses + Penalize key joints drifting from home. For a horse with 5 joints per leg, + we penalize (haa, hfe, kfe) for each of the 4 legs. + Distal joint (pfe, pastern_to_foot) naturally flex a lot, shock absorbers + """ + # indices of major joints per leg (haa,hfe,kfe) + major_joint_idx = torch.tensor( + [1, 2, 3, 6, 7, 8, 11, 12, 13, 16, 17, 18], device=self.device + ) + + major_joints = self.dof_pos[:, major_joint_idx] + scaled = major_joints / self.scales["dof_pos"][0] # normalize + + return -torch.mean(torch.square(scaled), dim=1) + + def _reward_swing_grf(self): + # Reward non-zero grf during swing (0 to pi) + rew = self.get_swing_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return -torch.sum(rew, dim=1) + + def _reward_stance_grf(self): + # Reward non-zero grf during stance (pi to 2pi) + rew = self.get_stance_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return torch.sum(rew, dim=1) + + def get_swing_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.lt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def get_stance_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.gt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), -torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def _reward_coupled_grf(self): + """ + Multiply rewards for stance/swing grf, discount when undesirable + behavior (grf during swing, no grf during stance) + """ + swing_rew = self.get_swing_grf() + stance_rew = self.get_stance_grf() + combined_rew = self._sqrdexp(swing_rew * 2) + stance_rew + prod = torch.prod(torch.clip(combined_rew, 0, 1), dim=1) + return prod - torch.ones_like(prod) + + # this is removed in mini_cheetah_osc + def _reward_orientation(self): + """Penalize non-flat base orientation""" + error = ( + torch.square(self.projected_gravity[:, :2]) + / self.cfg.reward_settings.tracking_sigma + ) + return torch.sum(torch.exp(-error), dim=1) + + # this is removed in mini_cheetah_osc + def _reward_min_base_height(self): + """Squared exponential saturating at base_height target""" + error = self.base_height - self.cfg.reward_settings.base_height_target + error /= self.scales["base_height"] + error = torch.clamp(error, max=0, min=None).flatten() + return self._sqrdexp(error) + + # this is removed in mini_cheetah_osc + def _reward_tracking_lin_vel(self): + """Tracking of linear velocity commands (xy axes)""" + # just use lin_vel? + error = self.commands[:, :2] - self.base_lin_vel[:, :2] + # * scale by (1+|cmd|): if cmd=0, no scaling. + error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2])) + error = torch.sum(torch.square(error), dim=1) + return torch.exp(-error / self.cfg.reward_settings.tracking_sigma) + + # this is removed in mini_cheetah_osc + def _reward_tracking_ang_vel(self): + """Tracking of angular velocity commands (yaw)""" + ang_vel_error = torch.square( + (self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0 + ) + return self._sqrdexp(ang_vel_error) + + def _reward_dof_vel(self): + """Penalize dof velocities""" + return super()._reward_dof_vel() * self._switch() + + def _reward_dof_near_home(self): + return super()._reward_dof_near_home() * self._switch() + + def _reward_stand_still(self): + """Penalize any movement when at zero commands.""" + # normalize angles so we care about being within ~5 degrees + rew_pos = torch.mean( + self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1 + ) + rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1) + rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1) + rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1) + return (rew_vel + rew_pos - rew_base_vel) * self._switch() + + def _reward_standing_torques(self): + """Penalize torques at zero commands""" + return super()._reward_torques() * self._switch() + + # * gait similarity scores + def angle_difference(self, theta1, theta2): + diff = torch.abs(theta1 - theta2) % (2 * torch.pi) + return torch.min(diff, 2 * torch.pi - diff) + + def _reward_trot(self): + # ! diagonal difference, front right and hind left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) + similarity = self._sqrdexp(angle, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + return similarity + + def _reward_pronk(self): + # ! diagonal difference, front right and hind left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) + similarity = self._sqrdexp(angle, torch.pi) + # ! diagonal difference, front left and hind right + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference, front right and front left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference front right and hind right + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle, torch.pi) + return similarity + + def _reward_pace(self): + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity = self._sqrdexp(angle, torch.pi) + # ! difference front left and hind left + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference front left and hind left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + # ! difference front left and hind left + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + + return similarity + + def _reward_any_symm_gait(self): + rew_trot = self._reward_trot() + rew_pace = self._reward_pace() + rew_bound = self._reward_bound() + return torch.max(torch.max(rew_trot, rew_pace), rew_bound) + + def _reward_enc_pace(self): + return self._reward_pace() + + def _reward_bound(self): + # ! difference, front right and front left + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity = self._sqrdexp(angle, torch.pi) + # ! difference hind right and hind left + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle, torch.pi) + # ! difference right side + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + # ! difference right side + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) + similarity *= self._sqrdexp(angle - torch.pi, torch.pi) + return similarity + + def _reward_asymettric(self): + # ! hind legs + angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) + similarity = 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! difference, left legs + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! difference right legs + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! front legs + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! diagonal FR + angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + # ! diagonal FL + angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) + similarity *= 1 - self._sqrdexp(angle, torch.pi) + similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) + return similarity + + def _reward_tracking_height(self): + """Reward for base height.""" + # error between current and commanded height + error = self.base_height.flatten() - self.commands[:, 3].flatten() + error /= self.scales["base_height"] + + return self._sqrdexp(error) diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py new file mode 100644 index 00000000..fcd92067 --- /dev/null +++ b/gym/envs/horse/horse_osc_config.py @@ -0,0 +1,318 @@ +from gym.envs.horse.horse_config import ( + HorseCfg, + HorseRunnerCfg, +) + +BASE_HEIGHT_REF = 1.3 + + +class HorseOscCfg(HorseCfg): + class viewer: + ref_env = 0 + pos = [-2.0, 3, 2] # [m] + lookat = [0.0, 1.0, 0.5] # [m] + + class env(HorseCfg.env): + num_envs = 2**12 + num_actuators = 1 + 4 * 5 + episode_length_s = 10 + env_spacing = 3.0 + + class terrain(HorseCfg.terrain): + mesh_type = "plane" + # mesh_type = 'trimesh' # none, plane, heightfield or trimesh + + class init_state(HorseCfg.init_state): + timeout_reset_ratio = 0.75 + reset_mode = "reset_to_range" + # * default COM for basic initialization + pos = [0.0, 0.0, 1.4] # x,y,z [m] + rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] + lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] + ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] + default_joint_angles = { + "haa": 0.0, + "hfe": 0.0, + "kfe": 0.0, + "pfe": 0.0, + "pastern_to_foot": 0.0, + "base_joint": 0.0, + } + + # * initialization for random range setup + # these are the physical limits in the URDF as of 17 Nov 2025 + # dof_pos_range = { + # "haa": [-0.2, 0.2], + # "hfe": [-0.7, 0.6], + # "kfe": [-1.3, 0.1], + # "pfe": [-0.3, 2.2], + # "pastern_to_foot": [-0.3, 1.8], + # "base_joint": [-0.2, 0.2], + # } + dof_pos_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.7, 0.6], + "kfe": [-1.3, 0.1], + "pfe": [-0.3, 2.2], + "pastern_to_foot": [-0.3, 1.8], + "base_joint": [-0.2, 0.2], + } + dof_vel_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.2, 0.2], + "kfe": [-0.2, 0.2], + "pfe": [-0.2, 0.2], + "pastern_to_foot": [-0.2, 0.2], + "base_joint": [-0.2, 0.2], + } + + root_pos_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [1.3, 1.3], # z + [-0.2, 0.2], # roll + [-0.2, 0.2], # pitch + [-0.2, 0.2], # yaw + ] + root_vel_range = [ + [-0.5, 5.0], # x + [0.0, 0.0], # y + [-0.05, 0.05], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + + class control(HorseCfg.control): + # * PD Drive parameters: + stiffness = { + "haa": 4000, + "hfe": 4000, + "kfe": 4000, + "pfe": 4000, + "pastern_to_foot": 4000, + "base_joint": 50, + } + damping = { + "haa": 250, + "hfe": 250, + "kfe": 250, + "pfe": 250, + "pastern_to_foot": 250, + "base_joint": 10, + } + ctrl_frequency = 250 # how often the PDF controller/action updates run + desired_sim_frequency = 500 # how often the physics is calculated + + class osc: # <-------------------most likely needs tuning + process_noise_std = 0.25 + grf_threshold = 0.1 # 20. # Normalized to body weight + # oscillator parameters + omega = 3 # gets overwritten + coupling = 1 # gets overwritten + osc_bool = False # not used in paper + grf_bool = False # not used in paper + randomize_osc_params = False + omega_range = [1.0, 4.0] # [0.0, 10.] + coupling_range = [0.0, 1.0] + offset_range = [0.0, 0.0] + stop_threshold = 0.5 + omega_stop = 1.0 + omega_step = 2.0 + omega_slope = 1.0 + omega_max = 4.0 + omega_var = 0.25 + # coupling_step = 0. + # coupling_stop = 0. + coupling_stop = 4.0 + coupling_step = 1.0 + coupling_slope = 0.0 + coupling_max = 1.0 + offset = 1.0 + coupling_var = 0.25 + + init_to = "random" + init_w_offset = True + + class commands: + resampling_time = 3.0 # * time before command are changed[s] + var = 1.0 + + class ranges: + lin_vel_x = [-1.0, 0.0, 1.0, 3.0] # min max [m/s] + lin_vel_y = 1.0 # max [m/s] + yaw_vel = 6 # max [rad/s] + height = [0.61, 1.30] # m + + class push_robots: + toggle = False + interval_s = 1 + max_push_vel_xy = 0.5 + push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] + + class domain_rand: + randomize_friction = True + friction_range = [0.4, 1.0] + randomize_base_mass = False + lower_mass_offset = -0.5 # kg + upper_mass_offset = 2.0 + lower_z_offset = 0.0 # m + upper_z_offset = 0.2 + lower_x_offset = 0.0 + upper_x_offset = 0.0 + + class asset(HorseCfg.asset): + shank_length_diff = 0 # Units in cm + file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" + foot_name = "foot" + penalize_contacts_on = ["thigh", "shank", "pastern"] + terminate_after_contacts_on = ["base"] + collapse_fixed_joints = False + fix_base_link = False + self_collisions = 1 + flip_visual_attachments = False + disable_gravity = False + disable_motors = False + joint_damping = 0.3 + + class reward_settings(HorseCfg.reward_settings): + soft_dof_pos_limit = 0.9 + soft_dof_vel_limit = 0.9 + soft_torque_limit = 0.9 + max_contact_force = 600.0 + base_height_target = BASE_HEIGHT_REF + 0.03 + tracking_sigma = 0.25 + switch_scale = 0.5 + + class scaling(HorseCfg.scaling): + base_ang_vel = [0.3, 0.3, 0.1] + base_lin_vel = BASE_HEIGHT_REF + dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1]) + dof_pos_obs = dof_pos + dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used + commands = [3, 1, 3, 1] # add height as a command + + +class HorseOscRunnerCfg(HorseRunnerCfg): + seed = -1 + runner_class_name = "OnPolicyRunner" + + class actor(HorseRunnerCfg.actor): + hidden_dims = [256, 256, 128] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + # obs = [ + # "base_height", + # "base_lin_vel", + # "base_ang_vel", + # "projected_gravity", + # "commands", + # "dof_pos_obs", + # "dof_vel", + # "dof_pos_target", + # ] + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "dof_pos_target", + # "osc_omega", + # "osc_coupling" + # "oscillators_vel", + # "grf", + ] + normalize_obs = True + actions = ["dof_pos_target"] + add_noise = True + disable_actions = False + + class noise: + scale = 1.0 + dof_pos_obs = 0.01 + base_ang_vel = 0.01 + dof_pos = 0.005 + dof_vel = 0.005 + lin_vel = 0.05 + ang_vel = [0.3, 0.15, 0.4] + gravity_vec = 0.1 + + class critic(HorseRunnerCfg.critic): + hidden_dims = [128, 64] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "oscillators_vel", + "dof_pos_target", + ] + normalize_obs = True + + class reward: + class weights: + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 + lin_vel_z = 0.0 + ang_vel_xy = 0.0 + orientation = 1.0 + torques = 5.0e-6 + dof_vel = 0.0 + min_base_height = 1.0 + collision = 0 + action_rate = 0.1 # -0.01 + action_rate2 = 0.01 # -0.001 + stand_still = 0.0 + dof_pos_limits = 0.0 + feet_contact_forces = 0.0 + dof_near_home = 0.0 + swing_grf = 1.0 + stance_grf = 1.0 + swing_velocity = 0.0 + stance_velocity = 0.0 + coupled_grf = 0.0 # 8. + enc_pace = 0.0 + cursorial = 0.25 + standing_torques = 0.0 # 1.e-5 + + class termination_weight: + termination = 15.0 / 100.0 + + class algorithm: + # training params + value_loss_coef = 1.0 + use_clipped_value_loss = True + clip_param = 0.2 + entropy_coef = 0.01 + num_learning_epochs = 4 + # mini batch size = num_envs*nsteps/nminibatches + num_mini_batches = 8 + max_gradient_steps = 32 + learning_rate = 1.0e-4 + schedule = "adaptive" # can be adaptive, fixed + discount_horizon = 1.0 + GAE_bootstrap_horizon = 2.0 + desired_kl = 0.01 + max_grad_norm = 1.0 + lr_range = [1e-5, 5e-3] + lr_ratio = 1.5 + + class runner(HorseRunnerCfg.runner): + run_name = "" + experiment_name = "horse_osc" + max_iterations = 500 + algorithm_class_name = "PPO2" + num_steps_per_env = 32 diff --git a/scripts/plot_logs_by_joint.py b/scripts/plot_logs_by_joint.py index 7ed25e70..f20bd9c7 100644 --- a/scripts/plot_logs_by_joint.py +++ b/scripts/plot_logs_by_joint.py @@ -11,6 +11,28 @@ # output folder os.makedirs(SAVE_DIR, exist_ok=True) +# joint limits +JOINT_LIMITS = { + r".*haa": [-0.2, 0.2], + r".*f_hfe": [-0.7, 0.6], + r".*h_hfe": [-0.7, 1.5], + r".*f_kfe": [-1.3, 0.1], + r".*h_kfe": [-0.2, 0.8], + r".*f_pfe": [-0.3, 2.2], + r".*h_pfe": [-0.3, 2.5], + r".*f_pastern_to_foot": [-0.3, 1.8], + r".*h_pastern_to_foot": [-0.3, 1.8], + r".*base_joint": [-0.2, 0.2], +} + + +def find_joint_limits(joint_name: str): + for pattern, limits in JOINT_LIMITS.items(): + if re.match(pattern, joint_name): + return limits + return None + + data = np.load(LOG_FILE, allow_pickle=True) steps = data["step"] target_pos = data["target_pos"] @@ -60,41 +82,67 @@ def plot_joint_type(joint_type, leg_to_idx): # find last index with nonzero torque (or position) nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] - if len(nonzero_idx) > 0: - last_valid = nonzero_idx[-1] + 1 - else: - last_valid = len(steps) + last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps) # position plot - axs[pos_row, col].plot( + pos_ax = axs[pos_row, col] + pos_ax.plot( steps[:last_valid], target_pos[ENV_ID, :last_valid, j], linestyle="--", linewidth=1.3, label="Target Pos", ) - axs[pos_row, col].plot( + pos_ax.plot( steps[:last_valid], actual_pos[ENV_ID, :last_valid, j], linewidth=1.3, label="Actual Pos", ) - axs[pos_row, col].set_title(f"{leg.upper()} - {joint_type.upper()} Position") - axs[pos_row, col].set_ylabel("Position (rad)") - axs[pos_row, col].grid(True, linestyle="--", alpha=0.4) - axs[pos_row, col].legend(fontsize=7) + + # add joint limit lines + limits = find_joint_limits(joint_names[j]) + if limits is not None: + lo, hi = limits + pos_ax.axhline( + lo, + color="blue", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Lower Limit ({lo:.2f})", + ) + pos_ax.axhline( + hi, + color="red", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Upper Limit ({hi:.2f})", + ) + + ymin, ymax = pos_ax.get_ylim() + ymin = min(ymin, lo - 0.05) + ymax = max(ymax, hi + 0.05) + pos_ax.set_ylim([ymin, ymax]) + + pos_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Position") + pos_ax.set_ylabel("Position (rad)") + pos_ax.grid(True, linestyle="--", alpha=0.4) + pos_ax.legend(fontsize=7) # torque plot - axs[torque_row, col].plot( + torque_ax = axs[torque_row, col] + torque_ax.plot( steps[:last_valid], torques[ENV_ID, :last_valid, j], color="tab:red", linewidth=1.2, ) - axs[torque_row, col].set_title(f"{leg.upper()} - {joint_type.upper()} Torque") - axs[torque_row, col].set_ylabel("Torque (Nm)") - axs[torque_row, col].set_xlabel("Step") - axs[torque_row, col].grid(True, linestyle="--", alpha=0.4) + torque_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Torque") + torque_ax.set_ylabel("Torque (Nm)") + torque_ax.set_xlabel("Step") + torque_ax.grid(True, linestyle="--", alpha=0.4) plt.suptitle(f"{joint_type.upper()} Joints (All Legs)", fontsize=16) save_path = os.path.join(SAVE_DIR, f"{joint_type}.png") @@ -121,10 +169,7 @@ def plot_base_joints(base_joints): # find last index with nonzero torque (or position) nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] - if len(nonzero_idx) > 0: - last_valid = nonzero_idx[-1] + 1 - else: - last_valid = len(steps) + last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps) # position pos_ax.plot( @@ -140,6 +185,33 @@ def plot_base_joints(base_joints): linewidth=1.3, label="Actual Pos", ) + + # add joint limits + limits = find_joint_limits(name) + if limits is not None: + lo, hi = limits + pos_ax.axhline( + lo, + color="blue", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Lower Limit ({lo:.2f})", + ) + pos_ax.axhline( + hi, + color="red", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Upper Limit ({hi:.2f})", + ) + + ymin, ymax = pos_ax.get_ylim() + ymin = min(ymin, lo - 0.05) + ymax = max(ymax, hi + 0.05) + pos_ax.set_ylim([ymin, ymax]) + pos_ax.set_title(f"{name} Position") pos_ax.set_ylabel("Position (rad)") pos_ax.legend(fontsize=7) From dc210124ed548a5281e0f6e4ea67b0f43585897f Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Wed, 19 Nov 2025 00:19:40 -0800 Subject: [PATCH 25/35] error stats logging --- .gitignore | 3 +- scripts/analyze_joint_scaling.py | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 scripts/analyze_joint_scaling.py diff --git a/.gitignore b/.gitignore index 2f256dfd..2a17069a 100644 --- a/.gitignore +++ b/.gitignore @@ -17,8 +17,7 @@ gym/wandb user/wandb_config.json *trajectories/ *.png -obs_scaling_output/ -obs_scaling_comparison/ +scaling_analysis/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/scripts/analyze_joint_scaling.py b/scripts/analyze_joint_scaling.py new file mode 100644 index 00000000..1de90d03 --- /dev/null +++ b/scripts/analyze_joint_scaling.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import numpy as np +import os +import matplotlib.pyplot as plt +import csv + +OUT_DIR = "scaling_analysis" +os.makedirs(OUT_DIR, exist_ok=True) + +LOG_FILE = "joint_logs.npz" + +# load data +d = np.load(LOG_FILE, allow_pickle=True) + +actual = d["actual_pos"] # [envs, steps, joints] +target = d["target_pos"] +joint_names = d["joint_name"] + +num_envs, total_steps, num_joints = actual.shape + +# episode length +if "step" in d: + step_arr = d["step"] + valid = np.where(step_arr != 0)[0] + actual_steps = valid[-1] + 1 if len(valid) > 0 else 1 +else: + tmp = target[0] + nz = np.where(np.any(tmp != 0, axis=1))[0] + actual_steps = nz[-1] + 1 + +print(f"Trimming timestep dimension from {total_steps} -> {actual_steps}") + +# trim arrays +actual = actual[:, :actual_steps, :] +target = target[:, :actual_steps, :] + +# raw error +error = target - actual # [envs, steps, joints] +abs_error = np.abs(error) + +# shape: (num_envs * steps, num_joints) +flat_err = abs_error.reshape(-1, num_joints) + +# mean absolute error for each joint +mae = np.mean(flat_err, axis=0) + +# standard deviation of absolute error for each joint +std_err = np.std(flat_err, axis=0) + +# csv stats +csv_path = os.path.join(OUT_DIR, "error_summary.csv") +with open(csv_path, "w", newline="") as f: + w = csv.writer(f) + w.writerow(["joint_idx", "joint_name", "MAE_all_envs", "STD_all_envs"]) + for j in range(num_joints): + w.writerow( + [ + j, + joint_names[j], + float(mae[j]), + float(std_err[j]), + ] + ) + +print(f"Saved raw error summary (all envs): {csv_path}") + +# histogram with all env +all_errors = error.reshape(-1, num_joints) + +for j in range(num_joints): + name = joint_names[j] + plt.figure(figsize=(10, 4)) + plt.hist(all_errors[:, j], bins=80) + plt.title(f"{name} – Raw Error Histogram (All Envs Combined)") + plt.xlabel("target_pos - actual_pos") + plt.ylabel("count") + plt.grid(True, alpha=0.3) + + out = os.path.join(OUT_DIR, f"{j:02d}_{name}_hist.png") + plt.savefig(out, dpi=200) + plt.close() + +print(f"Saved histograms in: {OUT_DIR}") From 2d37c6936ce07f645c740dbf209983cc6a293ecb Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Wed, 19 Nov 2025 00:20:01 -0800 Subject: [PATCH 26/35] wandb sweep config --- gym/envs/horse/horse_osc.py | 10 ++++ .../sweep_configs/sweep_horse_osc_config.json | 58 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 scripts/sweep_configs/sweep_horse_osc_config.json diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py index ce5f8b08..d8f6483a 100644 --- a/gym/envs/horse/horse_osc.py +++ b/gym/envs/horse/horse_osc.py @@ -524,3 +524,13 @@ def _reward_tracking_height(self): error /= self.scales["base_height"] return self._sqrdexp(error) + + # def _compute_torques(self): + # torques = ( + # self.p_gains * (self.dof_pos_target + self.default_dof_pos - self.dof_pos) + # + self.d_gains * (self.dof_vel_target - self.dof_vel) + # + self.tau_ff + # ) + + # # disable torque limits + # return torques diff --git a/scripts/sweep_configs/sweep_horse_osc_config.json b/scripts/sweep_configs/sweep_horse_osc_config.json new file mode 100644 index 00000000..df27bc5a --- /dev/null +++ b/scripts/sweep_configs/sweep_horse_osc_config.json @@ -0,0 +1,58 @@ +{ + "method": "random", + "metric": { + "name": "rewards/total_rewards", + "goal": "maximize" + }, + "run_cap": 50, + "parameters": { + "train_cfg.runner.max_iterations": { + "values": [500] + }, + "env_cfg.scaling.dof_pos": { + "values": [ + [0.2, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1], + [0.24, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52], + [0.288, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024], + [0.3456, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288], + [0.41472, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536], + [0.497664, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472] + ] + }, + + "env_cfg.scaling.dof_pos_target": { + "values": [ + [0.4, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2], + [0.48, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04], + [0.576, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048], + [0.6912, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576], + [0.82944, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008], + [0.995328, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976] + ] + }, + + "env_cfg.control.stiffness": { + "values": [ + {"haa": 4000, "hfe": 4000, "kfe": 4000, "pfe": 4000, "pastern_to_foot": 4000, "base_joint": 4000}, + {"haa": 3000, "hfe": 3000, "kfe": 3000, "pfe": 3000, "pastern_to_foot": 3000, "base_joint": 3000}, + {"haa": 2000, "hfe": 2000, "kfe": 2000, "pfe": 2000, "pastern_to_foot": 2000, "base_joint": 2000}, + {"haa": 1000, "hfe": 1000, "kfe": 1000, "pfe": 1000, "pastern_to_foot": 1000, "base_joint": 1000}, + {"haa": 500, "hfe": 500, "kfe": 500, "pfe": 500, "pastern_to_foot": 500, "base_joint": 500}, + {"haa": 250, "hfe": 250, "kfe": 250, "pfe": 250, "pastern_to_foot": 250, "base_joint": 250}, + {"haa": 100, "hfe": 100, "kfe": 100, "pfe": 100, "pastern_to_foot": 100, "base_joint": 100}, + {"haa": 50, "hfe": 50, "kfe": 50, "pfe": 50, "pastern_to_foot": 50, "base_joint": 50} + ] + }, + + "env_cfg.control.damping": { + "values": [ + {"haa": 400, "hfe": 400, "kfe": 400, "pfe": 400, "pastern_to_foot": 400, "base_joint": 400}, + {"haa": 300, "hfe": 300, "kfe": 300, "pfe": 300, "pastern_to_foot": 300, "base_joint": 300}, + {"haa": 250, "hfe": 250, "kfe": 250, "pfe": 250, "pastern_to_foot": 250, "base_joint": 250}, + {"haa": 200, "hfe": 200, "kfe": 200, "pfe": 200, "pastern_to_foot": 200, "base_joint": 200}, + {"haa": 100, "hfe": 100, "kfe": 100, "pfe": 100, "pastern_to_foot": 100, "base_joint": 100}, + {"haa": 50, "hfe": 50, "kfe": 50, "pfe": 50, "pastern_to_foot": 50, "base_joint": 50} + ] + } + } +} From c0411ff3ec11a636fe16b1de2cd67114c41f1278 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Wed, 19 Nov 2025 00:42:06 -0800 Subject: [PATCH 27/35] cleanup --- scripts/plot_obs_scaling.py | 42 ------------------------------------- 1 file changed, 42 deletions(-) delete mode 100644 scripts/plot_obs_scaling.py diff --git a/scripts/plot_obs_scaling.py b/scripts/plot_obs_scaling.py deleted file mode 100644 index dd3f7192..00000000 --- a/scripts/plot_obs_scaling.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -import os - -LOG_FILE = "obs_logs.npz" -ENV_ID = 0 -SAVE_DIR = "obs_scaling_output" -os.makedirs(SAVE_DIR, exist_ok=True) - -data = np.load(LOG_FILE, allow_pickle=True) - -# extract observation variables from keys -obs_vars = set() -for key in data.keys(): - print(key) - if key.endswith("_raw"): - obs_vars.add(key[:-4]) # remove '_raw' suffix - -print(f"Found observation variables: {obs_vars}") - - -def save_txt(var_name, values, save_dir): - """save multi-component array to a single .txt file""" - if values.ndim > 2: - # reshape to 2D: timesteps × components - values_2d = values.reshape(values.shape[0], -1) - else: - values_2d = values - save_path = os.path.join(save_dir, f"{var_name}.txt") - np.savetxt(save_path, values_2d, fmt="%.6f") - print(f"[saved txt] {save_path}") - - -for var in obs_vars: - raw_key = f"{var}_raw" - scaled_key = f"{var}_scaled" - - raw = data[raw_key][ENV_ID] - scaled = data[scaled_key][ENV_ID] if scaled_key in data else raw - - # save raw and scaled as separate .txt files - save_txt(f"{var}_raw", raw, SAVE_DIR) - save_txt(f"{var}_scaled", scaled, SAVE_DIR) From ca10c7e59950a5d2c3ecfba48b2baab01c4dad3a Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Thu, 20 Nov 2025 22:39:08 -0800 Subject: [PATCH 28/35] more cleanup --- gym/envs/base/legged_robot.py | 25 +++ gym/envs/horse/horse_osc.py | 228 +--------------------- gym/envs/horse/horse_osc_config.py | 4 - gym/envs/mini_cheetah/mini_cheetah_osc.py | 45 +---- 4 files changed, 30 insertions(+), 272 deletions(-) diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py index 04ae9405..2344e05f 100644 --- a/gym/envs/base/legged_robot.py +++ b/gym/envs/base/legged_robot.py @@ -987,6 +987,31 @@ def _sqrdexp(self, x, scale=1.0): -torch.square(x / scale) / self.cfg.reward_settings.tracking_sigma ) + def _process_rigid_body_props(self, props, env_id): + if env_id == 0: + # * init buffers for the domain rand changes + self.mass = torch.zeros(self.num_envs, 1, device=self.device) + self.com = torch.zeros(self.num_envs, 3, device=self.device) + + # * randomize mass + if self.cfg.domain_rand.randomize_base_mass: + lower = self.cfg.domain_rand.lower_mass_offset + upper = self.cfg.domain_rand.upper_mass_offset + # self.mass_ + props[0].mass += np.random.uniform(lower, upper) + self.mass[env_id] = props[0].mass + # * randomize com position + lower = self.cfg.domain_rand.lower_z_offset + upper = self.cfg.domain_rand.upper_z_offset + props[0].com.z += np.random.uniform(lower, upper) + self.com[env_id, 2] = props[0].com.z + + lower = self.cfg.domain_rand.lower_x_offset + upper = self.cfg.domain_rand.upper_x_offset + props[0].com.x += np.random.uniform(lower, upper) + self.com[env_id, 0] = props[0].com.x + return props + # ------------ reward functions---------------- def _reward_lin_vel_z(self): diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py index d8f6483a..3a8c73fb 100644 --- a/gym/envs/horse/horse_osc.py +++ b/gym/envs/horse/horse_osc.py @@ -1,5 +1,4 @@ import torch -import numpy as np from isaacgym.torch_utils import torch_rand_float from gym.envs.horse.horse import Horse @@ -43,35 +42,6 @@ def _reset_oscillators(self, env_ids): ) elif self.cfg.osc.init_to == "standing": self.oscillators[env_ids] = 3 * torch.pi / 2 - elif self.cfg.osc.init_to == "trot": - self.oscillators[env_ids] = torch.tensor( - [0.0, torch.pi, torch.pi, 0.0], device=self.device - ) - elif self.cfg.osc.init_to == "pace": - self.oscillators[env_ids] = torch.tensor( - [0.0, torch.pi, 0.0, torch.pi], device=self.device - ) - if self.cfg.osc.init_w_offset: - self.oscillators[env_ids, :] += ( - torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1) - * 2 - * torch.pi - ) - elif self.cfg.osc.init_to == "pronk": - self.oscillators[env_ids, :] *= 0.0 - elif self.cfg.osc.init_to == "bound": - self.oscillators[env_ids, :] = torch.tensor( - [torch.pi, torch.pi, 0.0, 0.0], device=self.device - ) - else: - raise NotImplementedError - - if self.cfg.osc.init_w_offset: - self.oscillators[env_ids, :] += ( - torch.rand_like(self.oscillators[env_ids, 0]).unsqueeze(1) - * 2 - * torch.pi - ) self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) def _reset_system(self, env_ids): @@ -97,22 +67,15 @@ def _reset_system(self, env_ids): def _pre_decimation_step(self): super()._pre_decimation_step() # self.grf = self._compute_grf() - if not self.cfg.osc.randomize_osc_params: - self.compute_osc_slope() + self.compute_osc_slope() def compute_osc_slope(self): cmd_x = torch.abs(self.commands[:, 0:1]) - self.cfg.osc.stop_threshold stop = cmd_x < 0 self.osc_offset = stop * self.cfg.osc.offset - self.osc_omega = ( - stop * self.cfg.osc.omega_stop - + torch.randn_like(self.osc_omega) * self.cfg.osc.omega_var - ) - self.osc_coupling = ( - stop * self.cfg.osc.coupling_stop - + torch.randn_like(self.osc_coupling) * self.cfg.osc.coupling_var - ) + self.osc_omega = stop * self.cfg.osc.omega_stop + self.osc_coupling = stop * self.cfg.osc.coupling_stop self.osc_omega += (~stop) * torch.clamp( cmd_x * self.cfg.osc.omega_slope + self.cfg.osc.omega_step, @@ -128,31 +91,6 @@ def compute_osc_slope(self): self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) - def _process_rigid_body_props(self, props, env_id): - if env_id == 0: - # * init buffers for the domain rand changes - self.mass = torch.zeros(self.num_envs, 1, device=self.device) - self.com = torch.zeros(self.num_envs, 3, device=self.device) - - # * randomize mass - if self.cfg.domain_rand.randomize_base_mass: - lower = self.cfg.domain_rand.lower_mass_offset - upper = self.cfg.domain_rand.upper_mass_offset - # self.mass_ - props[0].mass += np.random.uniform(lower, upper) - self.mass[env_id] = props[0].mass - # * randomize com position - lower = self.cfg.domain_rand.lower_z_offset - upper = self.cfg.domain_rand.upper_z_offset - props[0].com.z += np.random.uniform(lower, upper) - self.com[env_id, 2] = props[0].com.z - - lower = self.cfg.domain_rand.lower_x_offset - upper = self.cfg.domain_rand.upper_x_offset - props[0].com.x += np.random.uniform(lower, upper) - self.com[env_id, 0] = props[0].com.x - return props - def _post_decimation_step(self): """Update all states that are not handled in PhysX""" super()._post_decimation_step() @@ -173,19 +111,13 @@ def _step_oscillators(self, dt=None): ) grf = self._compute_grf() self.oscillators_vel = self.osc_omega - grf * local_feedback - # self.oscillators_vel *= torch_rand_float(0.9, - # 1.1, - # self.oscillators_vel.shape, - # self.device) self.oscillators_vel += ( torch.randn(self.oscillators_vel.shape, device=self.device) * self.cfg.osc.process_noise_std ) self.oscillators_vel *= 2 * torch.pi - self.oscillators += ( - self.oscillators_vel * dt - ) # torch.clamp(self.oscillators_vel * dt, min=0) + self.oscillators += self.oscillators_vel * dt self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) self.oscillator_obs = torch.cat( (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 @@ -213,17 +145,6 @@ def _resample_commands(self, env_ids): torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var ) - # possible_commands = torch.tensor(self.command_ranges["lin_vel_y"], - # device=self.device) - # self.commands[env_ids, 1:2] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - # possible_commands = torch.tensor(self.command_ranges["yaw_vel"], - # device=self.device) - # self.commands[env_ids, 0:1] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - if 0 in self.cfg.commands.ranges.lin_vel_x: # * with 20% chance, reset to 0 commands except for forward self.commands[env_ids, 1:] *= ( @@ -359,41 +280,6 @@ def _reward_coupled_grf(self): prod = torch.prod(torch.clip(combined_rew, 0, 1), dim=1) return prod - torch.ones_like(prod) - # this is removed in mini_cheetah_osc - def _reward_orientation(self): - """Penalize non-flat base orientation""" - error = ( - torch.square(self.projected_gravity[:, :2]) - / self.cfg.reward_settings.tracking_sigma - ) - return torch.sum(torch.exp(-error), dim=1) - - # this is removed in mini_cheetah_osc - def _reward_min_base_height(self): - """Squared exponential saturating at base_height target""" - error = self.base_height - self.cfg.reward_settings.base_height_target - error /= self.scales["base_height"] - error = torch.clamp(error, max=0, min=None).flatten() - return self._sqrdexp(error) - - # this is removed in mini_cheetah_osc - def _reward_tracking_lin_vel(self): - """Tracking of linear velocity commands (xy axes)""" - # just use lin_vel? - error = self.commands[:, :2] - self.base_lin_vel[:, :2] - # * scale by (1+|cmd|): if cmd=0, no scaling. - error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2])) - error = torch.sum(torch.square(error), dim=1) - return torch.exp(-error / self.cfg.reward_settings.tracking_sigma) - - # this is removed in mini_cheetah_osc - def _reward_tracking_ang_vel(self): - """Tracking of angular velocity commands (yaw)""" - ang_vel_error = torch.square( - (self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0 - ) - return self._sqrdexp(ang_vel_error) - def _reward_dof_vel(self): """Penalize dof velocities""" return super()._reward_dof_vel() * self._switch() @@ -421,102 +307,6 @@ def angle_difference(self, theta1, theta2): diff = torch.abs(theta1 - theta2) % (2 * torch.pi) return torch.min(diff, 2 * torch.pi - diff) - def _reward_trot(self): - # ! diagonal difference, front right and hind left - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) - similarity = self._sqrdexp(angle, torch.pi) - # ! diagonal difference, front left and hind right - angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) - similarity *= self._sqrdexp(angle, torch.pi) - # ! diagonal difference, front left and hind right - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) - similarity *= self._sqrdexp(angle - torch.pi, torch.pi) - # ! diagonal difference, front left and hind right - angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) - similarity *= self._sqrdexp(angle - torch.pi, torch.pi) - return similarity - - def _reward_pronk(self): - # ! diagonal difference, front right and hind left - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) - similarity = self._sqrdexp(angle, torch.pi) - # ! diagonal difference, front left and hind right - angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) - similarity *= self._sqrdexp(angle, torch.pi) - # ! difference, front right and front left - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) - similarity *= self._sqrdexp(angle, torch.pi) - # ! difference front right and hind right - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) - similarity *= self._sqrdexp(angle, torch.pi) - return similarity - - def _reward_pace(self): - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) - similarity = self._sqrdexp(angle, torch.pi) - # ! difference front left and hind left - angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) - similarity *= self._sqrdexp(angle, torch.pi) - # ! difference front left and hind left - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) - similarity *= self._sqrdexp(angle - torch.pi, torch.pi) - # ! difference front left and hind left - angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) - similarity *= self._sqrdexp(angle - torch.pi, torch.pi) - - return similarity - - def _reward_any_symm_gait(self): - rew_trot = self._reward_trot() - rew_pace = self._reward_pace() - rew_bound = self._reward_bound() - return torch.max(torch.max(rew_trot, rew_pace), rew_bound) - - def _reward_enc_pace(self): - return self._reward_pace() - - def _reward_bound(self): - # ! difference, front right and front left - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) - similarity = self._sqrdexp(angle, torch.pi) - # ! difference hind right and hind left - angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) - similarity *= self._sqrdexp(angle, torch.pi) - # ! difference right side - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) - similarity *= self._sqrdexp(angle - torch.pi, torch.pi) - # ! difference right side - angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) - similarity *= self._sqrdexp(angle - torch.pi, torch.pi) - return similarity - - def _reward_asymettric(self): - # ! hind legs - angle = self.angle_difference(self.oscillators[:, 2], self.oscillators[:, 3]) - similarity = 1 - self._sqrdexp(angle, torch.pi) - similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) - # ! difference, left legs - angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 3]) - similarity *= 1 - self._sqrdexp(angle, torch.pi) - similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) - # ! difference right legs - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 2]) - similarity *= 1 - self._sqrdexp(angle, torch.pi) - similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) - # ! front legs - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 1]) - similarity *= 1 - self._sqrdexp(angle, torch.pi) - similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) - # ! diagonal FR - angle = self.angle_difference(self.oscillators[:, 0], self.oscillators[:, 3]) - similarity *= 1 - self._sqrdexp(angle, torch.pi) - similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) - # ! diagonal FL - angle = self.angle_difference(self.oscillators[:, 1], self.oscillators[:, 2]) - similarity *= 1 - self._sqrdexp(angle, torch.pi) - similarity *= 1 - self._sqrdexp((torch.pi - angle), torch.pi) - return similarity - def _reward_tracking_height(self): """Reward for base height.""" # error between current and commanded height @@ -524,13 +314,3 @@ def _reward_tracking_height(self): error /= self.scales["base_height"] return self._sqrdexp(error) - - # def _compute_torques(self): - # torques = ( - # self.p_gains * (self.dof_pos_target + self.default_dof_pos - self.dof_pos) - # + self.d_gains * (self.dof_vel_target - self.dof_vel) - # + self.tau_ff - # ) - - # # disable torque limits - # return torques diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py index fcd92067..617bf086 100644 --- a/gym/envs/horse/horse_osc_config.py +++ b/gym/envs/horse/horse_osc_config.py @@ -112,7 +112,6 @@ class osc: # <-------------------most likely needs tuning coupling = 1 # gets overwritten osc_bool = False # not used in paper grf_bool = False # not used in paper - randomize_osc_params = False omega_range = [1.0, 4.0] # [0.0, 10.] coupling_range = [0.0, 1.0] offset_range = [0.0, 0.0] @@ -121,7 +120,6 @@ class osc: # <-------------------most likely needs tuning omega_step = 2.0 omega_slope = 1.0 omega_max = 4.0 - omega_var = 0.25 # coupling_step = 0. # coupling_stop = 0. coupling_stop = 4.0 @@ -129,10 +127,8 @@ class osc: # <-------------------most likely needs tuning coupling_slope = 0.0 coupling_max = 1.0 offset = 1.0 - coupling_var = 0.25 init_to = "random" - init_w_offset = True class commands: resampling_time = 3.0 # * time before command are changed[s] diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py index 77866ca3..f2b64409 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_osc.py +++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py @@ -1,5 +1,4 @@ import torch -import numpy as np from isaacgym.torch_utils import torch_rand_float from gym.envs.mini_cheetah.mini_cheetah import MiniCheetah @@ -129,31 +128,6 @@ def compute_osc_slope(self): self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) - def _process_rigid_body_props(self, props, env_id): - if env_id == 0: - # * init buffers for the domain rand changes - self.mass = torch.zeros(self.num_envs, 1, device=self.device) - self.com = torch.zeros(self.num_envs, 3, device=self.device) - - # * randomize mass - if self.cfg.domain_rand.randomize_base_mass: - lower = self.cfg.domain_rand.lower_mass_offset - upper = self.cfg.domain_rand.upper_mass_offset - # self.mass_ - props[0].mass += np.random.uniform(lower, upper) - self.mass[env_id] = props[0].mass - # * randomize com position - lower = self.cfg.domain_rand.lower_z_offset - upper = self.cfg.domain_rand.upper_z_offset - props[0].com.z += np.random.uniform(lower, upper) - self.com[env_id, 2] = props[0].com.z - - lower = self.cfg.domain_rand.lower_x_offset - upper = self.cfg.domain_rand.upper_x_offset - props[0].com.x += np.random.uniform(lower, upper) - self.com[env_id, 0] = props[0].com.x - return props - def _post_decimation_step(self): """Update all states that are not handled in PhysX""" super()._post_decimation_step() @@ -174,19 +148,13 @@ def _step_oscillators(self, dt=None): ) grf = self._compute_grf() self.oscillators_vel = self.osc_omega - grf * local_feedback - # self.oscillators_vel *= torch_rand_float(0.9, - # 1.1, - # self.oscillators_vel.shape, - # self.device) self.oscillators_vel += ( torch.randn(self.oscillators_vel.shape, device=self.device) * self.cfg.osc.process_noise_std ) self.oscillators_vel *= 2 * torch.pi - self.oscillators += ( - self.oscillators_vel * dt - ) # torch.clamp(self.oscillators_vel * dt, min=0) + self.oscillators += self.oscillators_vel * dt self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) self.oscillator_obs = torch.cat( (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 @@ -214,17 +182,6 @@ def _resample_commands(self, env_ids): torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var ) - # possible_commands = torch.tensor(self.command_ranges["lin_vel_y"], - # device=self.device) - # self.commands[env_ids, 1:2] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - # possible_commands = torch.tensor(self.command_ranges["yaw_vel"], - # device=self.device) - # self.commands[env_ids, 0:1] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - if 0 in self.cfg.commands.ranges.lin_vel_x: # * with 20% chance, reset to 0 commands except for forward self.commands[env_ids, 1:] *= ( From b9401ac6d309499cdb985781b2e27dc4684449de Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Thu, 20 Nov 2025 22:59:10 -0800 Subject: [PATCH 29/35] whoops still need randomize_osc_params --- gym/envs/horse/horse_osc_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py index 617bf086..b8d14e85 100644 --- a/gym/envs/horse/horse_osc_config.py +++ b/gym/envs/horse/horse_osc_config.py @@ -112,6 +112,7 @@ class osc: # <-------------------most likely needs tuning coupling = 1 # gets overwritten osc_bool = False # not used in paper grf_bool = False # not used in paper + randomize_osc_params = False omega_range = [1.0, 4.0] # [0.0, 10.] coupling_range = [0.0, 1.0] offset_range = [0.0, 0.0] From 80f05d044afffb9884f81bcad77b337ef88e9949 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Sun, 7 Dec 2025 23:14:54 -0800 Subject: [PATCH 30/35] horse_tweaks branch + tuning --- gym/envs/horse/horse_osc_config.py | 131 ++++++++++++++--------------- 1 file changed, 62 insertions(+), 69 deletions(-) diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py index b8d14e85..b2ad1b18 100644 --- a/gym/envs/horse/horse_osc_config.py +++ b/gym/envs/horse/horse_osc_config.py @@ -7,16 +7,10 @@ class HorseOscCfg(HorseCfg): - class viewer: - ref_env = 0 - pos = [-2.0, 3, 2] # [m] - lookat = [0.0, 1.0, 0.5] # [m] - class env(HorseCfg.env): num_envs = 2**12 num_actuators = 1 + 4 * 5 episode_length_s = 10 - env_spacing = 3.0 class terrain(HorseCfg.terrain): mesh_type = "plane" @@ -42,20 +36,24 @@ class init_state(HorseCfg.init_state): # * initialization for random range setup # these are the physical limits in the URDF as of 17 Nov 2025 # dof_pos_range = { - # "haa": [-0.2, 0.2], - # "hfe": [-0.7, 0.6], - # "kfe": [-1.3, 0.1], - # "pfe": [-0.3, 2.2], - # "pastern_to_foot": [-0.3, 1.8], - # "base_joint": [-0.2, 0.2], + # haa": [-0.2, 0.2], + # f_hfe": [-0.7, 0.6], + # h_hfe": [-0.7, 1.5], + # f_kfe": [-1.3, 0.1], + # h_kfe": [-0.2, 0.8], + # f_pfe": [-0.3, 2.2], + # h_pfe": [-0.3, 2.5], + # f_pastern_to_foot": [-0.3, 1.8], + # h_pastern_to_foot": [-0.3, 1.8], + # base_joint": [-0.2, 0.2], # } dof_pos_range = { "haa": [-0.2, 0.2], "hfe": [-0.7, 0.6], - "kfe": [-1.3, 0.1], + "kfe": [-0.2, 0.1], "pfe": [-0.3, 2.2], "pastern_to_foot": [-0.3, 1.8], - "base_joint": [-0.2, 0.2], + "base_joint": [-0.0, 0.0], } dof_vel_range = { "haa": [-0.2, 0.2], @@ -69,15 +67,15 @@ class init_state(HorseCfg.init_state): root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y - [1.3, 1.3], # z - [-0.2, 0.2], # roll - [-0.2, 0.2], # pitch + [1.35, 1.35], # z + [-0.0, 0.0], # roll + [-0.0, 0.0], # pitch [-0.2, 0.2], # yaw ] root_vel_range = [ - [-0.5, 5.0], # x + [0.0, 0.0], # x [0.0, 0.0], # y - [-0.05, 0.05], # z + [0.0, 0.0], # z [0.0, 0.0], # roll [0.0, 0.0], # pitch [0.0, 0.0], # yaw @@ -85,24 +83,25 @@ class init_state(HorseCfg.init_state): class control(HorseCfg.control): # * PD Drive parameters: + # HorseOscCfg.control stiffness = { - "haa": 4000, - "hfe": 4000, - "kfe": 4000, - "pfe": 4000, - "pastern_to_foot": 4000, - "base_joint": 50, + "haa": 75, + "hfe": 40, + "kfe": 38, + "pfe": 10, + "pastern_to_foot": 5, + "base_joint": 50, # still needs modifying } damping = { - "haa": 250, - "hfe": 250, - "kfe": 250, - "pfe": 250, - "pastern_to_foot": 250, + "haa": 0.0001, + "hfe": 0.0001, + "kfe": 0.0001, + "pfe": 0.5, + "pastern_to_foot": 0.5, "base_joint": 10, } ctrl_frequency = 250 # how often the PDF controller/action updates run - desired_sim_frequency = 500 # how often the physics is calculated + desired_sim_frequency = 1000 # how often the physics is calculated class osc: # <-------------------most likely needs tuning process_noise_std = 0.25 @@ -136,7 +135,7 @@ class commands: var = 1.0 class ranges: - lin_vel_x = [-1.0, 0.0, 1.0, 3.0] # min max [m/s] + lin_vel_x = [-1.0, 0.0, 1.0, 3.0, 6.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] yaw_vel = 6 # max [rad/s] height = [0.61, 1.30] # m @@ -163,7 +162,7 @@ class asset(HorseCfg.asset): file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" foot_name = "foot" penalize_contacts_on = ["thigh", "shank", "pastern"] - terminate_after_contacts_on = ["base"] + terminate_after_contacts_on = ["base"] # turn off for laying down collapse_fixed_joints = False fix_base_link = False self_collisions = 1 @@ -176,7 +175,7 @@ class reward_settings(HorseCfg.reward_settings): soft_dof_pos_limit = 0.9 soft_dof_vel_limit = 0.9 soft_torque_limit = 0.9 - max_contact_force = 600.0 + max_contact_force = 2000.0 # testing, this was too low for a horse weight base_height_target = BASE_HEIGHT_REF + 0.03 tracking_sigma = 0.25 switch_scale = 0.5 @@ -201,16 +200,6 @@ class actor(HorseRunnerCfg.actor): hidden_dims = [256, 256, 128] # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid activation = "elu" - # obs = [ - # "base_height", - # "base_lin_vel", - # "base_ang_vel", - # "projected_gravity", - # "commands", - # "dof_pos_obs", - # "dof_vel", - # "dof_pos_target", - # ] obs = [ "base_height", "base_lin_vel", @@ -264,52 +253,56 @@ class weights: tracking_lin_vel = 4.0 tracking_ang_vel = 2.0 lin_vel_z = 0.0 - ang_vel_xy = 0.0 + ang_vel_xy = 0.01 orientation = 1.0 - torques = 5.0e-6 + torques = 5.0e-10 dof_vel = 0.0 - min_base_height = 1.0 - collision = 0 - action_rate = 0.1 # -0.01 - action_rate2 = 0.01 # -0.001 + # min_base_height = 1.0 + action_rate = 1e-5 + action_rate2 = 1e-6 stand_still = 0.0 dof_pos_limits = 0.0 feet_contact_forces = 0.0 dof_near_home = 0.0 - swing_grf = 1.0 - stance_grf = 1.0 + swing_grf = 5.0 + stance_grf = 5.0 swing_velocity = 0.0 stance_velocity = 0.0 - coupled_grf = 0.0 # 8. + coupled_grf = 1.0 # penalize for grf during swing, no grf during stance enc_pace = 0.0 - cursorial = 0.25 + cursorial = 0.75 # enourage legs to stay under body, don't splay out standing_torques = 0.0 # 1.e-5 + tracking_height = 1.5 class termination_weight: termination = 15.0 / 100.0 class algorithm: - # training params - value_loss_coef = 1.0 - use_clipped_value_loss = True + # both + gamma = 0.99 + lam = 0.95 + # shared + batch_size = 2**15 + max_gradient_steps = 24 + # new + storage_size = 2**17 # new + batch_size = 2**15 # new + clip_param = 0.2 + learning_rate = 1.0e-3 + max_grad_norm = 1.0 + # Critic + use_clipped_value_loss = True + # Actor entropy_coef = 0.01 - num_learning_epochs = 4 - # mini batch size = num_envs*nsteps/nminibatches - num_mini_batches = 8 - max_gradient_steps = 32 - learning_rate = 1.0e-4 - schedule = "adaptive" # can be adaptive, fixed - discount_horizon = 1.0 - GAE_bootstrap_horizon = 2.0 + schedule = "adaptive" # could be adaptive, fixed desired_kl = 0.01 - max_grad_norm = 1.0 - lr_range = [1e-5, 5e-3] - lr_ratio = 1.5 + lr_range = [2e-4, 1e-2] + lr_ratio = 1.3 class runner(HorseRunnerCfg.runner): run_name = "" experiment_name = "horse_osc" - max_iterations = 500 + max_iterations = 1000 algorithm_class_name = "PPO2" num_steps_per_env = 32 From 91f2e7c5409ab02ae5c5fca4bc9e9d8e805f5d11 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Thu, 11 Dec 2025 01:19:35 -0800 Subject: [PATCH 31/35] expand joint limits for lay down motion --- gym/envs/horse/horse_osc_config.py | 18 +++++++-------- resources/robots/horse/urdf/horse.urdf | 32 +++++++++++++------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py index b2ad1b18..95427000 100644 --- a/gym/envs/horse/horse_osc_config.py +++ b/gym/envs/horse/horse_osc_config.py @@ -34,24 +34,24 @@ class init_state(HorseCfg.init_state): } # * initialization for random range setup - # these are the physical limits in the URDF as of 17 Nov 2025 + # these are the physical limits in the URDF as of 11 Dec 2025 # dof_pos_range = { # haa": [-0.2, 0.2], - # f_hfe": [-0.7, 0.6], - # h_hfe": [-0.7, 1.5], - # f_kfe": [-1.3, 0.1], - # h_kfe": [-0.2, 0.8], - # f_pfe": [-0.3, 2.2], - # h_pfe": [-0.3, 2.5], + # f_hfe": [-1.0, 0.6], + # h_hfe": [-1.5, 0.5], + # f_kfe": [-1.5, 0.1], + # h_kfe": [-0.2, 1.0], + # f_pfe": [-0.3, 3.0], + # h_pfe": [-1.2, 2.5], # f_pastern_to_foot": [-0.3, 1.8], # h_pastern_to_foot": [-0.3, 1.8], # base_joint": [-0.2, 0.2], # } dof_pos_range = { "haa": [-0.2, 0.2], - "hfe": [-0.7, 0.6], + "hfe": [-0.7, 0.5], "kfe": [-0.2, 0.1], - "pfe": [-0.3, 2.2], + "pfe": [-0.3, 1.0], "pastern_to_foot": [-0.3, 1.8], "base_joint": [-0.0, 0.0], } diff --git a/resources/robots/horse/urdf/horse.urdf b/resources/robots/horse/urdf/horse.urdf index 224bb330..a756bbfb 100644 --- a/resources/robots/horse/urdf/horse.urdf +++ b/resources/robots/horse/urdf/horse.urdf @@ -55,7 +55,7 @@ - + @@ -80,7 +80,7 @@ - + @@ -107,7 +107,7 @@ - + @@ -134,7 +134,7 @@ - + @@ -186,7 +186,7 @@ - + @@ -211,7 +211,7 @@ - + @@ -238,7 +238,7 @@ - + @@ -265,7 +265,7 @@ - + @@ -317,7 +317,7 @@ - + @@ -341,7 +341,7 @@ - + @@ -368,7 +368,7 @@ - + @@ -395,7 +395,7 @@ - + @@ -447,7 +447,7 @@ - + @@ -471,7 +471,7 @@ - + @@ -498,7 +498,7 @@ - + @@ -525,7 +525,7 @@ - + From 1f547c4edcdc24adc49524e099d7d0e8ed323dbf Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Thu, 11 Dec 2025 01:25:18 -0800 Subject: [PATCH 32/35] logs_by_joint new limits --- scripts/plot_logs_by_joint.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/plot_logs_by_joint.py b/scripts/plot_logs_by_joint.py index f20bd9c7..a2952bf2 100644 --- a/scripts/plot_logs_by_joint.py +++ b/scripts/plot_logs_by_joint.py @@ -14,12 +14,12 @@ # joint limits JOINT_LIMITS = { r".*haa": [-0.2, 0.2], - r".*f_hfe": [-0.7, 0.6], - r".*h_hfe": [-0.7, 1.5], - r".*f_kfe": [-1.3, 0.1], - r".*h_kfe": [-0.2, 0.8], - r".*f_pfe": [-0.3, 2.2], - r".*h_pfe": [-0.3, 2.5], + r".*f_hfe": [-1.0, 0.6], + r".*h_hfe": [-1.5, 0.5], + r".*f_kfe": [-1.5, 0.1], + r".*h_kfe": [-0.2, 1.0], + r".*f_pfe": [-0.3, 3.0], + r".*h_pfe": [-1.2, 2.5], r".*f_pastern_to_foot": [-0.3, 1.8], r".*h_pastern_to_foot": [-0.3, 1.8], r".*base_joint": [-0.2, 0.2], From 935b4d0defe9113ab59514b0a1d4882c5134661b Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 30 Jan 2026 00:35:31 -0800 Subject: [PATCH 33/35] add tendon constraints --- gym/envs/horse/horse_osc.py | 208 ++++++++++++++++++++-- gym/envs/horse/horse_osc_config.py | 20 ++- gym/utils/interfaces/KeyboardInterface.py | 4 +- resources/robots/horse/urdf/horse.urdf | 32 ++-- scripts/play.py | 19 ++ 5 files changed, 247 insertions(+), 36 deletions(-) diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py index 3a8c73fb..e5ed572c 100644 --- a/gym/envs/horse/horse_osc.py +++ b/gym/envs/horse/horse_osc.py @@ -6,6 +6,7 @@ from isaacgym import gymtorch HORSE_WEIGHT = 536.38 * 9.81 # Weight of horse in Newtons +BASE_HEIGHT_REF = 1.3 class HorseOsc(Horse): @@ -14,6 +15,99 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): def _init_buffers(self): super()._init_buffers() + + BASE = 0 + + RH = dict(haa=1, hfe=2, kfe=3, pfe=4, pastern=5) + LH = dict(haa=6, hfe=7, kfe=8, pfe=9, pastern=10) + RF = dict(haa=11, hfe=12, kfe=13, pfe=14, pastern=15) + LF = dict(haa=16, hfe=17, kfe=18, pfe=19, pastern=20) + + # tensors for vectorized ops + self.idx = { + "base": torch.tensor([BASE], device=self.device), + # all legs by joint type (RH, LH, RF, LF order) + "haa": torch.tensor( + [RH["haa"], LH["haa"], RF["haa"], LF["haa"]], device=self.device + ), + "hfe": torch.tensor( + [RH["hfe"], LH["hfe"], RF["hfe"], LF["hfe"]], device=self.device + ), + "kfe": torch.tensor( + [RH["kfe"], LH["kfe"], RF["kfe"], LF["kfe"]], device=self.device + ), + "pfe": torch.tensor( + [RH["pfe"], LH["pfe"], RF["pfe"], LF["pfe"]], device=self.device + ), + "pastern": torch.tensor( + [RH["pastern"], LH["pastern"], RF["pastern"], LF["pastern"]], + device=self.device, + ), + # hind vs front splits (HIND = RH, LH / FRONT = RF, LF) + "hind_haa": torch.tensor([RH["haa"], LH["haa"]], device=self.device), + "hind_hfe": torch.tensor([RH["hfe"], LH["hfe"]], device=self.device), + "hind_kfe": torch.tensor([RH["kfe"], LH["kfe"]], device=self.device), + "hind_pfe": torch.tensor([RH["pfe"], LH["pfe"]], device=self.device), + "hind_pastern": torch.tensor( + [RH["pastern"], LH["pastern"]], device=self.device + ), + "front_haa": torch.tensor([RF["haa"], LF["haa"]], device=self.device), + "front_hfe": torch.tensor([RF["hfe"], LF["hfe"]], device=self.device), + "front_kfe": torch.tensor([RF["kfe"], LF["kfe"]], device=self.device), + "front_pfe": torch.tensor([RF["pfe"], LF["pfe"]], device=self.device), + "front_pastern": torch.tensor( + [RF["pastern"], LF["pastern"]], device=self.device + ), + # legs (all joints within each leg) + "rh_leg": torch.tensor( + [RH["haa"], RH["hfe"], RH["kfe"], RH["pfe"], RH["pastern"]], + device=self.device, + ), + "lh_leg": torch.tensor( + [LH["haa"], LH["hfe"], LH["kfe"], LH["pfe"], LH["pastern"]], + device=self.device, + ), + "rf_leg": torch.tensor( + [RF["haa"], RF["hfe"], RF["kfe"], RF["pfe"], RF["pastern"]], + device=self.device, + ), + "lf_leg": torch.tensor( + [LF["haa"], LF["hfe"], LF["kfe"], LF["pfe"], LF["pastern"]], + device=self.device, + ), + # group by front or hind legs + "hind_legs": torch.tensor( + [ + RH["haa"], + RH["hfe"], + RH["kfe"], + RH["pfe"], + RH["pastern"], + LH["haa"], + LH["hfe"], + LH["kfe"], + LH["pfe"], + LH["pastern"], + ], + device=self.device, + ), + "front_legs": torch.tensor( + [ + RF["haa"], + RF["hfe"], + RF["kfe"], + RF["pfe"], + RF["pastern"], + LF["haa"], + LF["hfe"], + LF["kfe"], + LF["pfe"], + LF["pastern"], + ], + device=self.device, + ), + } + self.oscillators = torch.zeros(self.num_envs, 4, device=self.device) self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device) @@ -29,6 +123,8 @@ def _init_buffers(self): self.num_envs, 1, device=self.device ) + self.commands[:, 3] = BASE_HEIGHT_REF + def _reset_oscillators(self, env_ids): if len(env_ids) == 0: return @@ -48,7 +144,6 @@ def _reset_system(self, env_ids): if len(env_ids) == 0: return self._reset_oscillators(env_ids) - self.oscillator_obs = torch.cat( (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 ) @@ -63,6 +158,7 @@ def _reset_system(self, env_ids): if len(env_ids) == 0: return super()._reset_system(env_ids) + self.commands[env_ids, 3] = BASE_HEIGHT_REF def _pre_decimation_step(self): super()._pre_decimation_step() @@ -164,6 +260,7 @@ def _resample_commands(self, env_ids): if self.cfg.osc.randomize_osc_params: self._resample_osc_params(env_ids) + self.commands[env_ids, 3] = BASE_HEIGHT_REF def _resample_osc_params(self, env_ids): if len(env_ids) > 0: @@ -225,15 +322,8 @@ def _reward_cursorial(self): we penalize (haa, hfe, kfe) for each of the 4 legs. Distal joint (pfe, pastern_to_foot) naturally flex a lot, shock absorbers """ - # indices of major joints per leg (haa,hfe,kfe) - major_joint_idx = torch.tensor( - [1, 2, 3, 6, 7, 8, 11, 12, 13, 16, 17, 18], device=self.device - ) - - major_joints = self.dof_pos[:, major_joint_idx] - scaled = major_joints / self.scales["dof_pos"][0] # normalize - - return -torch.mean(torch.square(scaled), dim=1) + legs = torch.tensor(list(range(1, 21)), device=self.device) + return -torch.mean(torch.square(self.dof_pos[:, legs]), dim=1) def _reward_swing_grf(self): # Reward non-zero grf during swing (0 to pi) @@ -314,3 +404,101 @@ def _reward_tracking_height(self): error /= self.scales["base_height"] return self._sqrdexp(error) + + def _reward_tendon_constraints(self): + """ + Tendon-like coupling constraints (front/hind split): + - Desired KFE/PFE become dependant on HFE. + - Penalize deviation from those desired values. + """ + + # --- helpers --- + def lerp(x, x0, x1, y0, y1): + """Linear interpolation y(x) between (x0,y0)->(x1,y1), with clamping.""" + # handle degenerate interval + denom = x1 - x0 + denom = torch.where(torch.abs(denom) < 1e-8, torch.ones_like(denom), denom) + t = (x - x0) / denom + t = torch.clamp(t, 0.0, 1.0) + return y0 + t * (y1 - y0) + + def piecewise_2seg(x, mid, x_lo, x_hi, y_lo, y_mid, y_hi): + """ + Piecewise linear passing through: + (x=mid, y=y_mid) + and reaching: + (x=x_lo, y=y_lo) for x <= mid + (x=x_hi, y=y_hi) for x >= mid + """ + y_left = lerp( + x, mid, x_lo, y_mid, y_lo + ) # x in [x_lo, mid] (or beyond -> clamped) + y_right = lerp( + x, mid, x_hi, y_mid, y_hi + ) # x in [mid, x_hi] (or beyond -> clamped) + return torch.where(x <= mid, y_left, y_right) + + # Hind constraints (RH, LH) + hfe_hind = self.dof_pos[:, self.idx["hind_hfe"]] # [N,2] + kfe_hind = self.dof_pos[:, self.idx["hind_kfe"]] + pfe_hind = self.dof_pos[:, self.idx["hind_pfe"]] + + # 3) hind hfe: 0 -> -1.5 => kfe: 0 -> +1.0, pfe: 0 -> -1.2 + # 4) hind hfe: 0 -> +0.5 => kfe: 0 -> -0.2, pfe: 0 -> -0.5 + kfe_hind_des = piecewise_2seg( + hfe_hind, + mid=torch.zeros_like(hfe_hind), # at hfe=0 + x_lo=-1.5 * torch.ones_like(hfe_hind), + x_hi=+0.5 * torch.ones_like(hfe_hind), + y_lo=+1.0 * torch.ones_like(hfe_hind), + y_mid=0.0 * torch.ones_like(hfe_hind), + y_hi=-0.2 * torch.ones_like(hfe_hind), + ) + + pfe_hind_des = piecewise_2seg( + hfe_hind, + mid=torch.zeros_like(hfe_hind), + x_lo=-1.5 * torch.ones_like(hfe_hind), + x_hi=+0.5 * torch.ones_like(hfe_hind), + y_lo=-1.2 * torch.ones_like(hfe_hind), + y_mid=0.0 * torch.ones_like(hfe_hind), + y_hi=-0.5 * torch.ones_like(hfe_hind), + ) + + # Front constraints (RF, LF) + hfe_front = self.dof_pos[:, self.idx["front_hfe"]] # [N,2] + kfe_front = self.dof_pos[:, self.idx["front_kfe"]] + pfe_front = self.dof_pos[:, self.idx["front_pfe"]] + + # 5) front hfe: 0 -> +0.6 => kfe: 0 -> +0.1, pfe: 0 -> -0.3 + # 6) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +3.0 + kfe_front_des = piecewise_2seg( + hfe_front, + mid=torch.zeros_like(hfe_front), + x_lo=-1.0 * torch.ones_like(hfe_front), + x_hi=+0.6 * torch.ones_like(hfe_front), + y_lo=-1.5 * torch.ones_like(hfe_front), + y_mid=0.0 * torch.ones_like(hfe_front), + y_hi=+0.1 * torch.ones_like(hfe_front), + ) + + pfe_front_des = piecewise_2seg( + hfe_front, + mid=torch.zeros_like(hfe_front), + x_lo=-1.0 * torch.ones_like(hfe_front), + x_hi=+0.6 * torch.ones_like(hfe_front), + y_lo=+3.0 * torch.ones_like(hfe_front), + y_mid=0.0 * torch.ones_like(hfe_front), + y_hi=-0.3 * torch.ones_like(hfe_front), + ) + + # penalty (soft “tendon coupling”) + # squared error around desired coupling curve + hind_pen = (kfe_hind - kfe_hind_des) ** 2 + (pfe_hind - pfe_hind_des) ** 2 + front_pen = (kfe_front - kfe_front_des) ** 2 + (pfe_front - pfe_front_des) ** 2 + + # mean over joints (2 legs) then return per-env reward + pen = torch.mean(hind_pen, dim=1) + torch.mean(front_pen, dim=1) + + # reward is negative penalty + return -pen diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py index 95427000..5cb0ccec 100644 --- a/gym/envs/horse/horse_osc_config.py +++ b/gym/envs/horse/horse_osc_config.py @@ -49,9 +49,9 @@ class init_state(HorseCfg.init_state): # } dof_pos_range = { "haa": [-0.2, 0.2], - "hfe": [-0.7, 0.5], + "hfe": [-1.0, 0.5], "kfe": [-0.2, 0.1], - "pfe": [-0.3, 1.0], + "pfe": [-0.3, 2.5], "pastern_to_foot": [-0.3, 1.8], "base_joint": [-0.0, 0.0], } @@ -161,8 +161,8 @@ class asset(HorseCfg.asset): shank_length_diff = 0 # Units in cm file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" foot_name = "foot" - penalize_contacts_on = ["thigh", "shank", "pastern"] - terminate_after_contacts_on = ["base"] # turn off for laying down + penalize_contacts_on = ["thigh"] # "thigh", "shank", "pastern" + terminate_after_contacts_on = ["thigh"] collapse_fixed_joints = False fix_base_link = False self_collisions = 1 @@ -185,9 +185,12 @@ class scaling(HorseCfg.scaling): base_lin_vel = BASE_HEIGHT_REF dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) base_height = BASE_HEIGHT_REF - dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1]) + dof_pos = [0.2] + ( + 4 * [0.4, 1.3, 1.4, 2.5, 2.1] + ) # reducing this to be alot smaller 2.1 or 2.5 dof_pos_obs = dof_pos - dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + # dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + dof_pos_target = [0.4] + (4 * [0.8, 2.6, 2.8, 5.0, 4.2]) tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used commands = [3, 1, 3, 1] # add height as a command @@ -270,9 +273,10 @@ class weights: stance_velocity = 0.0 coupled_grf = 1.0 # penalize for grf during swing, no grf during stance enc_pace = 0.0 - cursorial = 0.75 # enourage legs to stay under body, don't splay out + cursorial = 1.0 # enourage legs to stay under body, don't splay out standing_torques = 0.0 # 1.e-5 - tracking_height = 1.5 + tracking_height = 1.0 + tendon_constraints = 1.0 class termination_weight: termination = 15.0 / 100.0 diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index 9607c835..ce740f50 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -44,7 +44,7 @@ def __init__(self, env): self.max_vel_yaw = 2.0 self.increment_yaw = self.max_vel_yaw * 0.2 - self.min_height = 0.6 + self.min_height = -1000 self.max_height = 1.3 self.increment_height = 0.1 @@ -90,7 +90,7 @@ def update(self, env): elif evt.action == "height_down": env.commands[:, 3] = torch.clamp( env.commands[:, 3] - self.increment_height, - max=self.min_height, + min=self.min_height, ) elif evt.action == "QUIT": env.exit = True diff --git a/resources/robots/horse/urdf/horse.urdf b/resources/robots/horse/urdf/horse.urdf index a756bbfb..56e5443f 100644 --- a/resources/robots/horse/urdf/horse.urdf +++ b/resources/robots/horse/urdf/horse.urdf @@ -86,7 +86,7 @@ - + @@ -113,7 +113,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -179,7 +179,7 @@ - + @@ -217,7 +217,7 @@ - + @@ -244,7 +244,7 @@ - + @@ -283,7 +283,7 @@ - + @@ -310,7 +310,7 @@ - + @@ -347,7 +347,7 @@ - + @@ -374,7 +374,7 @@ - + @@ -413,7 +413,7 @@ - + @@ -440,7 +440,7 @@ - + @@ -477,7 +477,7 @@ - + @@ -504,7 +504,7 @@ - + @@ -543,7 +543,7 @@ - + @@ -570,7 +570,7 @@ - + \ No newline at end of file diff --git a/scripts/play.py b/scripts/play.py index 7b6f8d98..48347ba6 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -7,6 +7,8 @@ import torch import numpy as np +BASE_HEIGHT_REF = 1.3 + def setup(args): env_cfg, train_cfg = task_registry.create_cfgs(args) @@ -101,6 +103,8 @@ def play(env, runner, train_cfg): # track actual number of simulation steps actual_steps = 0 + # track and print commanded height changes + last_height_cmd = None # * set up recording if env.cfg.viewer.record: @@ -113,6 +117,7 @@ def play(env, runner, train_cfg): if COMMANDS_INTERFACE: # interface = GamepadInterface(env) interface = KeyboardInterface(env) + env.commands[:, 3] = BASE_HEIGHT_REF try: for i in range(10 * int(env.max_episode_length)): @@ -121,6 +126,20 @@ def play(env, runner, train_cfg): if env.cfg.viewer.record: recorder.update(i) + # print target/actual height (m) + target_height = env.commands[0, 3].item() + if (last_height_cmd is None) or ( + abs(target_height - last_height_cmd) > 1e-6 + ): + actual_height = env.base_height[0].item() + + print( + f"[HEIGHT] actual = {actual_height:.3f} m | " + f"target = {target_height:.3f} m | " + f"error = {actual_height - target_height:.3f}" + ) + last_height_cmd = target_height + runner.set_actions( runner.actor_cfg["actions"], runner.get_inference_actions(), From 70cfb20634bf6cf3bbc4702482f3a38465edb056 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 6 Feb 2026 02:07:32 -0800 Subject: [PATCH 34/35] attempt at smoothing descent rewards --- gym/envs/horse/horse_osc.py | 71 +++++++++++++++++++++++ gym/envs/horse/horse_osc_config.py | 5 +- gym/utils/interfaces/KeyboardInterface.py | 2 +- scripts/play.py | 3 +- 4 files changed, 78 insertions(+), 3 deletions(-) diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py index e5ed572c..ab960e7c 100644 --- a/gym/envs/horse/horse_osc.py +++ b/gym/envs/horse/horse_osc.py @@ -108,6 +108,11 @@ def _init_buffers(self): ), } + self.prev_height_error = torch.zeros(self.num_envs, device=self.device) + self.lie_down_done = torch.zeros( + self.num_envs, dtype=torch.bool, device=self.device + ) + self.oscillators = torch.zeros(self.num_envs, 4, device=self.device) self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device) @@ -159,6 +164,11 @@ def _reset_system(self, env_ids): return super()._reset_system(env_ids) self.commands[env_ids, 3] = BASE_HEIGHT_REF + if len(env_ids) > 0: + h_cmd = self.commands[env_ids, 3].flatten() + h_now = self.base_height[env_ids].flatten() + self.prev_height_error[env_ids] = torch.abs(h_now - h_cmd) + self.lie_down_done[env_ids] = False def _pre_decimation_step(self): super()._pre_decimation_step() @@ -502,3 +512,64 @@ def piecewise_2seg(x, mid, x_lo, x_hi, y_lo, y_mid, y_hi): # reward is negative penalty return -pen + + def _reward_feet_support_during_descent(self): + """ + Reward having support forces (GRF) on the feet during lie-down / descent + """ + # Lie-down is when commanded height is low + lie_cmd = self.commands[:, 3] # (num_envs,) + lie_mode = lie_cmd < 1.0 # bool (num_envs,) + + # Descending means base vertical velocity is negative (falling down) + descending = self.base_lin_vel[:, 2] < 0.0 + + # Only reward support when we're actually trying to go down + phase = (lie_mode & descending).float().unsqueeze(1) # (num_envs, 1) + + # Get per-foot GRF + grf = self._compute_grf(grf_norm=True) # (num_envs, 4), already 0–1 + + # contact = (grf > 0.1).float() + # rewards proportional load sharing + support = torch.mean(grf, dim=1) + + return phase.squeeze(1) * support + + def _reward_controlled_descent(self): + lie_cmd = self.commands[:, 3] + lie_mode = lie_cmd < 1.0 + + # Only while still above target height (avoid bounce farming near the ground) + above_target = self.base_height.flatten() > (lie_cmd + 0.05) + + # "done" after first settle/impact so reward doesn't apply during bounces + grf = self._compute_grf(grf_norm=True) # (N,4) 0..1 + support_all = (grf > 0.1).float().mean(dim=1) # (N,) + # impact_event: 4 feet making contact and height is 12 cm above height + impact_event = (support_all > 0.75) & ( + self.base_height.flatten() < lie_cmd + 0.12 + ) + # Once impact_event happens, lie_down_done stays True + self.lie_down_done |= impact_event + + # Reward only applies when: in lie-down command, still above the target, haven’t + # “completed” the lie-down yet. + active = lie_mode & above_target & (~self.lie_down_done) + + # penalize downward speed and upward bounce separately + vz = self.base_lin_vel[:, 2] + fall_speed = torch.clamp(-vz, min=0.0) # downward only + bounce_speed = torch.clamp(vz, min=0.0) # upward only + + # penalize force above 5000 (hard threshold), squared, averaged across feet + foot_F = torch.norm( + self.contact_forces[:, self.feet_indices, :], dim=-1 + ) # (N,4) + impact_pen = torch.mean(torch.clamp(foot_F - 5000.0, min=0.0) ** 2, dim=1) + + # Encourage small downward speed during the *first* descent, + # penalize bounce and impact spikes + reward = -(fall_speed**2) - 2.0 * (bounce_speed**2) - 2.0 * impact_pen + + return active.float() * reward diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py index 5cb0ccec..ac9777a1 100644 --- a/gym/envs/horse/horse_osc_config.py +++ b/gym/envs/horse/horse_osc_config.py @@ -20,7 +20,7 @@ class init_state(HorseCfg.init_state): timeout_reset_ratio = 0.75 reset_mode = "reset_to_range" # * default COM for basic initialization - pos = [0.0, 0.0, 1.4] # x,y,z [m] + pos = [0.0, 0.0, 1.35] # x,y,z [m] rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] @@ -277,6 +277,9 @@ class weights: standing_torques = 0.0 # 1.e-5 tracking_height = 1.0 tendon_constraints = 1.0 + feet_support_during_descent = 100.0 + controlled_descent = 100 + kneel_front_then_hind = 0 class termination_weight: termination = 15.0 / 100.0 diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index ce740f50..da7a8ac8 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -46,7 +46,7 @@ def __init__(self, env): self.min_height = -1000 self.max_height = 1.3 - self.increment_height = 0.1 + self.increment_height = 0.7 def update(self, env): for evt in env.gym.query_viewer_action_events(env.viewer): diff --git a/scripts/play.py b/scripts/play.py index 48347ba6..e589451d 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -136,7 +136,8 @@ def play(env, runner, train_cfg): print( f"[HEIGHT] actual = {actual_height:.3f} m | " f"target = {target_height:.3f} m | " - f"error = {actual_height - target_height:.3f}" + f"error = {actual_height - target_height:.3f} | " + f"descend mode = {(env.commands[:, 3][0]) < 1.0}" ) last_height_cmd = target_height From 1957e3f5fdcd221bfc547ace0ca1f2b524d6d337 Mon Sep 17 00:00:00 2001 From: angelayixuanli Date: Fri, 13 Feb 2026 02:10:34 -0800 Subject: [PATCH 35/35] plots and fix height pos and command --- gym/envs/horse/horse_osc.py | 300 ++++++++++++++++------ gym/envs/horse/horse_osc_config.py | 11 +- gym/utils/interfaces/KeyboardInterface.py | 2 +- resources/robots/horse/urdf/horse.urdf | 31 ++- scripts/play.py | 70 ++++- scripts/plot_height_rewards.py | 28 ++ 6 files changed, 357 insertions(+), 85 deletions(-) create mode 100644 scripts/plot_height_rewards.py diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py index ab960e7c..5c0ef2c1 100644 --- a/gym/envs/horse/horse_osc.py +++ b/gym/envs/horse/horse_osc.py @@ -128,7 +128,15 @@ def _init_buffers(self): self.num_envs, 1, device=self.device ) - self.commands[:, 3] = BASE_HEIGHT_REF + env_ids = torch.arange(self.num_envs, device=self.device, dtype=torch.long) + + self._reset_to_custom_pos(env_ids, self.root_states[env_ids, 2].clone()) + + # resample height command + h_min, h_max = self.cfg.commands.ranges.height + self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand( + (len(env_ids),), device=self.device + ) def _reset_oscillators(self, env_ids): if len(env_ids) == 0: @@ -163,7 +171,16 @@ def _reset_system(self, env_ids): if len(env_ids) == 0: return super()._reset_system(env_ids) - self.commands[env_ids, 3] = BASE_HEIGHT_REF + + # pose horse based set height + self._reset_to_custom_pos(env_ids, self.root_states[env_ids, 2].clone()) + + # resample height command + h_min, h_max = self.cfg.commands.ranges.height + self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand( + (len(env_ids),), device=self.device + ) + if len(env_ids) > 0: h_cmd = self.commands[env_ids, 3].flatten() h_now = self.base_height[env_ids].flatten() @@ -270,7 +287,12 @@ def _resample_commands(self, env_ids): if self.cfg.osc.randomize_osc_params: self._resample_osc_params(env_ids) - self.commands[env_ids, 3] = BASE_HEIGHT_REF + + # resample height command + h_min, h_max = self.cfg.commands.ranges.height + self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand( + (len(env_ids),), device=self.device + ) def _resample_osc_params(self, env_ids): if len(env_ids) > 0: @@ -448,6 +470,18 @@ def piecewise_2seg(x, mid, x_lo, x_hi, y_lo, y_mid, y_hi): ) # x in [mid, x_hi] (or beyond -> clamped) return torch.where(x <= mid, y_left, y_right) + # make desired match (N,2) like the actual joint tensor + def _match_legs(desired, actual_legs): + # actual_legs: (N,2) + if desired.dim() == 0: + return desired.view(1, 1).expand_as(actual_legs) + if desired.dim() == 1: + return desired.unsqueeze(1).expand_as(actual_legs) # (N,) -> (N,2) + if desired.dim() == 2: + # if already (N,2) it's good + return desired + raise RuntimeError(f"Unexpected desired shape: {tuple(desired.shape)}") + # Hind constraints (RH, LH) hfe_hind = self.dof_pos[:, self.idx["hind_hfe"]] # [N,2] kfe_hind = self.dof_pos[:, self.idx["hind_kfe"]] @@ -480,96 +514,212 @@ def piecewise_2seg(x, mid, x_lo, x_hi, y_lo, y_mid, y_hi): kfe_front = self.dof_pos[:, self.idx["front_kfe"]] pfe_front = self.dof_pos[:, self.idx["front_pfe"]] - # 5) front hfe: 0 -> +0.6 => kfe: 0 -> +0.1, pfe: 0 -> -0.3 - # 6) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +3.0 + # 5) front hfe: 0 -> +0.6 => kfe: 0 -> 0, pfe: 0 -> 0 + # 6) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +1.5 + # 7) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +3.0 + + zeros = torch.zeros_like(hfe_front) + ones = torch.ones_like(hfe_front) + + # kfe: flat 0 for hfe in [0, +0.6], ramp to -1.5 for hfe in [-1.0, 0] kfe_front_des = piecewise_2seg( hfe_front, - mid=torch.zeros_like(hfe_front), - x_lo=-1.0 * torch.ones_like(hfe_front), - x_hi=+0.6 * torch.ones_like(hfe_front), - y_lo=-1.5 * torch.ones_like(hfe_front), - y_mid=0.0 * torch.ones_like(hfe_front), - y_hi=+0.1 * torch.ones_like(hfe_front), + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=-1.5 * ones, # at hfe=-1.0 + y_mid=0.0 * ones, # at hfe=0 + y_hi=0.0 * ones, # at hfe=+0.6 ) - pfe_front_des = piecewise_2seg( + # pfe: flat 0 for hfe in [0, +0.6], ramp to +1.5 for hfe in [-1.0, 0] + pfe_front_des_15 = piecewise_2seg( hfe_front, - mid=torch.zeros_like(hfe_front), - x_lo=-1.0 * torch.ones_like(hfe_front), - x_hi=+0.6 * torch.ones_like(hfe_front), - y_lo=+3.0 * torch.ones_like(hfe_front), - y_mid=0.0 * torch.ones_like(hfe_front), - y_hi=-0.3 * torch.ones_like(hfe_front), + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=+1.5 * ones, # at hfe=-1.0 + y_mid=0.0 * ones, # at hfe=0 + y_hi=0.0 * ones, # at hfe=+0.6 ) - # penalty (soft “tendon coupling”) - # squared error around desired coupling curve - hind_pen = (kfe_hind - kfe_hind_des) ** 2 + (pfe_hind - pfe_hind_des) ** 2 - front_pen = (kfe_front - kfe_front_des) ** 2 + (pfe_front - pfe_front_des) ** 2 - - # mean over joints (2 legs) then return per-env reward - pen = torch.mean(hind_pen, dim=1) + torch.mean(front_pen, dim=1) - - # reward is negative penalty - return -pen + # pfe: flat 0 for hfe in [0, +0.6], ramp to +3.0 for hfe in [-1.0, 0] + pfe_front_des_30 = piecewise_2seg( + hfe_front, + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=+3.0 * ones, # at hfe=-1.0 + y_mid=0.0 * ones, # at hfe=0 + y_hi=0.0 * ones, # at hfe=+0.6 + ) - def _reward_feet_support_during_descent(self): - """ - Reward having support forces (GRF) on the feet during lie-down / descent - """ - # Lie-down is when commanded height is low - lie_cmd = self.commands[:, 3] # (num_envs,) - lie_mode = lie_cmd < 1.0 # bool (num_envs,) + # --- HIND --- + kfe_hind_des_ = _match_legs(kfe_hind_des, kfe_hind) # -> (N,2) + pfe_hind_des_ = _match_legs(pfe_hind_des, pfe_hind) # -> (N,2) + hind_pen = (kfe_hind - kfe_hind_des_) ** 2 + ( + pfe_hind - pfe_hind_des_ + ) ** 2 # (N,2) - # Descending means base vertical velocity is negative (falling down) - descending = self.base_lin_vel[:, 2] < 0.0 + # --- FRONT --- + # KFE: flat 0 for hfe in [0, 0.6], ramp to -1.5 for hfe in [-1,0] + kfe_front_des_ = _match_legs(kfe_front_des, kfe_front) # -> (N,2) + kfe_err = (kfe_front - kfe_front_des_) ** 2 # (N,2) - # Only reward support when we're actually trying to go down - phase = (lie_mode & descending).float().unsqueeze(1) # (num_envs, 1) + # negative HFE: + # - PFE: 0 -> +1.5 (hfe 0 -> -1) + # - PFE: 0 -> +3.0 (hfe 0 -> -1) + pfe_front_des15_ = _match_legs(pfe_front_des_15, pfe_front) # -> (N,2) + pfe_front_des30_ = _match_legs(pfe_front_des_30, pfe_front) # -> (N,2) - # Get per-foot GRF - grf = self._compute_grf(grf_norm=True) # (num_envs, 4), already 0–1 + pfe_err15 = (pfe_front - pfe_front_des15_) ** 2 + pfe_err30 = (pfe_front - pfe_front_des30_) ** 2 - # contact = (grf > 0.1).float() - # rewards proportional load sharing - support = torch.mean(grf, dim=1) + # BOTH branches acceptable always: + pfe_term = torch.minimum(pfe_err15, pfe_err30) # (N,2) - return phase.squeeze(1) * support + front_pen = kfe_err + pfe_term # (N,2) - def _reward_controlled_descent(self): - lie_cmd = self.commands[:, 3] - lie_mode = lie_cmd < 1.0 + # --- aggregate per env --- + pen = hind_pen.mean(dim=1) + front_pen.mean(dim=1) # (N,) + return -pen - # Only while still above target height (avoid bounce farming near the ground) - above_target = self.base_height.flatten() > (lie_cmd + 0.05) + def _reset_to_custom_pos(self, env_ids, reset_h): + env_ids = env_ids.to(dtype=torch.long, device=self.device) + B = env_ids.numel() + if B == 0: + return - # "done" after first settle/impact so reward doesn't apply during bounces - grf = self._compute_grf(grf_norm=True) # (N,4) 0..1 - support_all = (grf > 0.1).float().mean(dim=1) # (N,) - # impact_event: 4 feet making contact and height is 12 cm above height - impact_event = (support_all > 0.75) & ( - self.base_height.flatten() < lie_cmd + 0.12 - ) - # Once impact_event happens, lie_down_done stays True - self.lie_down_done |= impact_event + # --- normalize reset_h to (B,) on device --- + if not torch.is_tensor(reset_h): + reset_h = torch.tensor( + reset_h, device=self.device, dtype=torch.float32 + ).repeat(B) + else: + reset_h = reset_h.to(device=self.device, dtype=torch.float32) + if reset_h.numel() == 1: + reset_h = reset_h.repeat(B) + elif reset_h.shape[0] != B: + # if passed full (num_envs,), subset it + if reset_h.shape[0] == self.num_envs: + reset_h = reset_h[env_ids] + else: + raise RuntimeError( + f"reset_h shape {reset_h.shape} incompatible env_id batch {B}" + ) + + # env_ids must be LongTensor on device + env_ids = env_ids.to(device=self.device, dtype=torch.long) + + # Get a per-env base pose, regardless of how default_dof_pos is stored + ddp = self.default_dof_pos + + if ddp.dim() == 1: + # (num_dofs,) -> make (B, num_dofs) + base_pose = ddp.unsqueeze(0).repeat(env_ids.numel(), 1).clone() + elif ddp.dim() == 2 and ddp.shape[0] == 1: + # (1, num_dofs) -> expand to (B, num_dofs) + base_pose = ddp.expand(env_ids.numel(), -1).clone() + elif ddp.dim() == 2 and ddp.shape[0] == self.num_envs: + # (num_envs, num_dofs) -> index by env ids + base_pose = ddp[env_ids].clone() + else: + raise RuntimeError(f"Unexpected default_dof_pos shape: {tuple(ddp.shape)}") + + # Decide stand vs lie by z pos + h_mid = 0.7 + + use_stand = reset_h >= h_mid # (B,) bool + + stand_pose = base_pose.clone() + lie_pose = base_pose.clone() + + # DOF map: 0 base_joint + # RH: 1..5, LH: 6..10, RF: 11..15, LF: 16..20 (haa,hfe,kfe,pfe,pastern) + + pos_range = self.cfg.init_state.dof_pos_range + + for base in [1, 6, 11, 16]: + haa = base + 0 + hfe = base + 1 + kfe = base + 2 + pfe = base + 3 + pas = base + 4 + + # standing (tune) + stand_pose[:, haa] = pos_range["haa"][0] + ( + pos_range["haa"][1] - pos_range["haa"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, hfe] = pos_range["hfe"][0] + ( + pos_range["hfe"][1] - pos_range["hfe"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, kfe] = pos_range["kfe"][0] + ( + pos_range["kfe"][1] - pos_range["kfe"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, pfe] = pos_range["pfe"][0] + ( + pos_range["pfe"][1] - pos_range["pfe"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, pas] = pos_range["pastern_to_foot"][0] + ( + pos_range["pastern_to_foot"][1] - pos_range["pastern_to_foot"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + + # front kneel (tune) + for base in [11, 16]: + haa = base + 0 + hfe = base + 1 + kfe = base + 2 + pfe = base + 3 + pas = base + 4 + lie_pose[:, haa] = 0.0 + lie_pose[:, hfe] = -1.0 + (0.2 - -1.0) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, kfe] = -1.5 + (-0.5 - -1.5) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pfe] = -0.3 + (3.0 - -0.3) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pas] = -0.3 + (1.8 - -0.3) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) - # Reward only applies when: in lie-down command, still above the target, haven’t - # “completed” the lie-down yet. - active = lie_mode & above_target & (~self.lie_down_done) + # hind tuck (tune) + for base in [1, 6]: + haa = base + 0 + hfe = base + 1 + kfe = base + 2 + pfe = base + 3 + pas = base + 4 + lie_pose[:, haa] = 0.0 + lie_pose[:, hfe] = -1.5 + (-1 - -1.5) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, kfe] = -0.2 + (1 - -0.2) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pfe] = 0.0 + (-1.2 - 0.0) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pas] = -0.3 + (1.8 - -0.3) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) - # penalize downward speed and upward bounce separately - vz = self.base_lin_vel[:, 2] - fall_speed = torch.clamp(-vz, min=0.0) # downward only - bounce_speed = torch.clamp(vz, min=0.0) # upward only + new_pose = torch.where(use_stand.unsqueeze(1), stand_pose, lie_pose) - # penalize force above 5000 (hard threshold), squared, averaged across feet - foot_F = torch.norm( - self.contact_forces[:, self.feet_indices, :], dim=-1 - ) # (N,4) - impact_pen = torch.mean(torch.clamp(foot_F - 5000.0, min=0.0) ** 2, dim=1) + # clamp to limits (dof_pos_limits: (num_dof, 2)) + lo = self.dof_pos_limits[:, 0].unsqueeze(0) + hi = self.dof_pos_limits[:, 1].unsqueeze(0) + new_pose = torch.max(torch.min(new_pose, hi), lo) - # Encourage small downward speed during the *first* descent, - # penalize bounce and impact spikes - reward = -(fall_speed**2) - 2.0 * (bounce_speed**2) - 2.0 * impact_pen + # apply to buffers + self.dof_pos[env_ids] = new_pose + self.dof_vel[env_ids] = 0.0 - return active.float() * reward + if ( + hasattr(self, "dof_state") + and self.dof_state.dim() == 3 + and self.dof_state.shape[2] >= 2 + ): + self.dof_state[env_ids, :, 0] = new_pose + self.dof_state[env_ids, :, 1] = 0.0 diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py index ac9777a1..436af043 100644 --- a/gym/envs/horse/horse_osc_config.py +++ b/gym/envs/horse/horse_osc_config.py @@ -67,7 +67,7 @@ class init_state(HorseCfg.init_state): root_pos_range = [ [0.0, 0.0], # x [0.0, 0.0], # y - [1.35, 1.35], # z + [0.6, 1.35], # z [-0.0, 0.0], # roll [-0.0, 0.0], # pitch [-0.2, 0.2], # yaw @@ -138,7 +138,7 @@ class ranges: lin_vel_x = [-1.0, 0.0, 1.0, 3.0, 6.0] # min max [m/s] lin_vel_y = 1.0 # max [m/s] yaw_vel = 6 # max [rad/s] - height = [0.61, 1.30] # m + height = [0.0, 1.30] # m, command range of height during training class push_robots: toggle = False @@ -162,7 +162,7 @@ class asset(HorseCfg.asset): file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" foot_name = "foot" penalize_contacts_on = ["thigh"] # "thigh", "shank", "pastern" - terminate_after_contacts_on = ["thigh"] + terminate_after_contacts_on = ["top"] collapse_fixed_joints = False fix_base_link = False self_collisions = 1 @@ -275,11 +275,8 @@ class weights: enc_pace = 0.0 cursorial = 1.0 # enourage legs to stay under body, don't splay out standing_torques = 0.0 # 1.e-5 - tracking_height = 1.0 + tracking_height = 5.0 tendon_constraints = 1.0 - feet_support_during_descent = 100.0 - controlled_descent = 100 - kneel_front_then_hind = 0 class termination_weight: termination = 15.0 / 100.0 diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index da7a8ac8..ce740f50 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -46,7 +46,7 @@ def __init__(self, env): self.min_height = -1000 self.max_height = 1.3 - self.increment_height = 0.7 + self.increment_height = 0.1 def update(self, env): for evt in env.gym.query_viewer_action_events(env.viewer): diff --git a/resources/robots/horse/urdf/horse.urdf b/resources/robots/horse/urdf/horse.urdf index 56e5443f..22d3eb3f 100644 --- a/resources/robots/horse/urdf/horse.urdf +++ b/resources/robots/horse/urdf/horse.urdf @@ -1,5 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -18,7 +47,7 @@ - + diff --git a/scripts/play.py b/scripts/play.py index e589451d..46d3bb46 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -6,6 +6,7 @@ # torch needs to be imported after isaacgym imports in local source import torch import numpy as np +import os BASE_HEIGHT_REF = 1.3 @@ -101,6 +102,46 @@ def play(env, runner, train_cfg): obs_log = create_obs_logging_dict(env, obs_vars, num_steps) + # --- height reward logging --- + height_rew = np.zeros((num_steps,), dtype=np.float32) + height_actual = np.zeros((num_steps,), dtype=np.float32) + height_target = np.zeros((num_steps,), dtype=np.float32) + + # ---------- record init pos/height ---------- + OUT_DIR = "play_logs" + os.makedirs(OUT_DIR, exist_ok=True) + + # num dofs (don't rely on env.num_dofs, use tensor shape) + num_dofs = env.dof_pos.shape[1] + + # Capture *one-time* init snapshot for all envs + # root z: prefer root_states[:,2] if it exists; otherwise fall back to base_height + if hasattr(env, "root_states"): + init_root_z = env.root_states[:, 2].detach().cpu().numpy().astype(np.float32) + else: + init_root_z = env.base_height.detach().cpu().numpy().astype(np.float32) + + # commanded height at start + init_cmd_h = env.commands[:, 3].detach().cpu().numpy().astype(np.float32) + # starting joint positions + init_dof_pos = env.dof_pos.detach().cpu().numpy().astype(np.float32) + + # Build a single 2D table: (num_envs, 2 + num_dofs) + # col0 = init_root_z, col1 = init_cmd_h, col2.. = dof_pos + init_table = np.zeros((env.num_envs, 2 + num_dofs), dtype=np.float32) + init_table[:, 0] = init_root_z + init_table[:, 1] = init_cmd_h + init_table[:, 2:] = init_dof_pos + + # Column names/joint names + if hasattr(env, "dof_names"): + dof_names = list(env.dof_names) + else: + # fallback: generic names + dof_names = [f"dof_{i}" for i in range(num_dofs)] + + col_names = ["init_root_z", "init_cmd_height"] + [f"init_{n}" for n in dof_names] + # track actual number of simulation steps actual_steps = 0 # track and print commanded height changes @@ -117,7 +158,12 @@ def play(env, runner, train_cfg): if COMMANDS_INTERFACE: # interface = GamepadInterface(env) interface = KeyboardInterface(env) - env.commands[:, 3] = BASE_HEIGHT_REF + + # resample height command only + h_min, h_max = env.cfg.commands.ranges.height + env.commands[:, 3] = h_min + (h_max - h_min) * torch.rand( + (env.num_envs,), device=env.device + ) try: for i in range(10 * int(env.max_episode_length)): @@ -159,6 +205,12 @@ def play(env, runner, train_cfg): # log observations log_obs_step(env, obs_log, obs_vars, actual_steps) + # _reward_tracking_height returns (num_envs,) tensor + r = env._reward_tracking_height() # torch tensor + height_rew[actual_steps] = r[0].item() + height_actual[actual_steps] = env.base_height[0].item() + height_target[actual_steps] = env.commands[0, 3].item() + actual_steps += 1 env.check_exit() # user exit or viewer closed @@ -189,6 +241,22 @@ def play(env, runner, train_cfg): np.savez_compressed("obs_logs.npz", **obs_log_cpu) print(f"\nSaved obs log to obs_logs.npz ({actual_steps} steps)") + # ---------- save init data ---------- + init_npz_path = os.path.join(OUT_DIR, "init_snapshot.npz") + np.savez_compressed( + init_npz_path, + init_table=init_table, + col_names=np.array(col_names, dtype=object), + ) + print(f"[saved] init snapshot -> {init_npz_path}") + + # save as csv + init_csv_path = os.path.join(OUT_DIR, "init_snapshot.csv") + with open(init_csv_path, "w") as f: + f.write(",".join(col_names) + "\n") + np.savetxt(f, init_table, delimiter=",", fmt="%.6f") + print(f"[saved] init snapshot csv -> {init_csv_path}") + if __name__ == "__main__": args = get_args() diff --git a/scripts/plot_height_rewards.py b/scripts/plot_height_rewards.py new file mode 100644 index 00000000..967bf178 --- /dev/null +++ b/scripts/plot_height_rewards.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import numpy as np +import matplotlib.pyplot as plt + +LOG_FILE = "height_reward_logs.npz" +OUT_PNG = "height_reward_plot.png" + +d = np.load(LOG_FILE) +step = d["step"] +height_rew = d["height_rew"] +h_actual = d["height_actual"] +h_target = d["height_target"] + +plt.figure(figsize=(12, 5)) + +plt.plot(step, h_actual, label="actual height (m)") +plt.plot(step, h_target, label="target height (m)") +plt.plot(step, height_rew, label="_reward_tracking_height") + +plt.xlabel("step") +plt.ylabel("value") +plt.title("Height Tracking + Reward (Same Axis)") +plt.grid(True, alpha=0.3) +plt.legend() +plt.tight_layout() +plt.savefig(OUT_PNG, dpi=200) + +print(f"Saved plot -> {OUT_PNG}")