Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
68aac8e
add height training
angelayixuanli Sep 6, 2025
90f3271
mini_cheetah with horse mass
angelayixuanli Oct 23, 2025
b4ed45c
scale inertia
angelayixuanli Oct 24, 2025
1582c75
baseline - stable cheetah
angelayixuanli Oct 29, 2025
afb939c
baseline - split body
angelayixuanli Oct 31, 2025
da4739a
Merge branch 'main' of github.com:sheim/QGym into yl/horseRL
angelayixuanli Nov 2, 2025
fb34dd5
mini_cheetah with horse values
angelayixuanli Nov 5, 2025
f9fa9d3
urdf-debugger fixed
angelayixuanli Nov 5, 2025
af0534b
stable horse but can't stand
angelayixuanli Nov 7, 2025
a880139
realistic effort values
angelayixuanli Nov 8, 2025
c456a0c
better results
angelayixuanli Nov 10, 2025
5001e40
manual merge of non-logging part of yl/horse_RL
sheim Nov 11, 2025
03538e5
Merge branch 'main' into horse_tweaks
sheim Nov 11, 2025
12dc7a4
good SWE cleanup
angelayixuanli Nov 12, 2025
e1b3733
WIP: partial urdf changes
sheim Nov 12, 2025
9aae283
reformat mini_cheetah urdf file for readability
sheim Nov 12, 2025
f155bb4
fix left legs
sheim Nov 12, 2025
318d55e
mirror left and right
sheim Nov 12, 2025
b247418
remove inertial offsets along y for symmetry
sheim Nov 12, 2025
5285384
rename urdf for symmetry ease, some minor tweaks
sheim Nov 12, 2025
ed90880
Merge branch 'yl/horseRL' into horse_tweaks
angelayixuanli Nov 13, 2025
aa7e536
merge horse tweaks
angelayixuanli Nov 13, 2025
71985f6
plots separated by leg or joint
angelayixuanli Nov 13, 2025
795cec0
log obs scaling
angelayixuanli Nov 14, 2025
5d65cc9
update scaling
sheim Nov 17, 2025
4414757
split off horse on its own
sheim Nov 17, 2025
f674162
fix play script
sheim Nov 17, 2025
21a6913
Merge pull request #33 from sheim/horse_tweaks
angelayixuanli Nov 18, 2025
c7eb06f
add horse_osc
angelayixuanli Nov 18, 2025
dc21012
error stats logging
angelayixuanli Nov 19, 2025
2d37c69
wandb sweep config
angelayixuanli Nov 19, 2025
c0411ff
cleanup
angelayixuanli Nov 19, 2025
ca10c7e
more cleanup
angelayixuanli Nov 21, 2025
b9401ac
whoops still need randomize_osc_params
angelayixuanli Nov 21, 2025
80f05d0
horse_tweaks branch + tuning
angelayixuanli Dec 8, 2025
91f2e7c
expand joint limits for lay down motion
angelayixuanli Dec 11, 2025
1f547c4
logs_by_joint new limits
angelayixuanli Dec 11, 2025
935b4d0
add tendon constraints
angelayixuanli Jan 30, 2026
70cfb20
attempt at smoothing descent rewards
angelayixuanli Feb 6, 2026
1957e3f
plots and fix height pos and command
angelayixuanli Feb 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ gym/wandb
*.npz
user/wandb_config.json
*trajectories/
*.png
scaling_analysis/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
12 changes: 12 additions & 0 deletions gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"MiniCheetah": ".mini_cheetah.mini_cheetah",
"MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref",
"MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc",
"Horse": ".horse.horse",
"HorseOsc": ".horse.horse_osc",
"MIT_Humanoid": ".mit_humanoid.mit_humanoid",
"Anymal": ".anymal_c.anymal",
"A1": ".a1.a1",
Expand All @@ -28,6 +30,8 @@
"MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config",
"MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config",
"MiniCheetahSACCfg": ".mini_cheetah.mini_cheetah_SAC_config",
"HorseCfg": ".horse.horse_config",
"HorseOscCfg": ".horse.horse_osc_config",
"MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config",
"A1Cfg": ".a1.a1_config",
"AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config",
Expand All @@ -42,6 +46,8 @@
"MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config",
"MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config",
"MiniCheetahSACRunnerCfg": ".mini_cheetah.mini_cheetah_SAC_config",
"HorseRunnerCfg": ".horse.horse_config",
"HorseOscRunnerCfg": ".horse.horse_osc_config",
"MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config",
"A1RunnerCfg": ".a1.a1_config",
"AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config",
Expand All @@ -68,6 +74,12 @@
"MiniCheetahSACCfg",
"MiniCheetahSACRunnerCfg"
],
"horse": ["Horse", "HorseCfg", "HorseRunnerCfg"],
"horse_osc": [
"HorseOsc",
"HorseOscCfg",
"HorseOscRunnerCfg",
],
"humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"],
"humanoid_running": [
"HumanoidRunning",
Expand Down
28 changes: 27 additions & 1 deletion gym/envs/base/legged_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,9 @@ def _init_buffers(self):
dtype=torch.float,
device=self.device,
)
# add height as the 4th command
self.commands = torch.zeros(
self.num_envs, 3, dtype=torch.float, device=self.device
self.num_envs, 4, dtype=torch.float, device=self.device
)
self.base_lin_vel = quat_rotate_inverse(
self.base_quat, self.root_states[:, 7:10]
Expand Down Expand Up @@ -986,6 +987,31 @@ def _sqrdexp(self, x, scale=1.0):
-torch.square(x / scale) / self.cfg.reward_settings.tracking_sigma
)

def _process_rigid_body_props(self, props, env_id):
if env_id == 0:
# * init buffers for the domain rand changes
self.mass = torch.zeros(self.num_envs, 1, device=self.device)
self.com = torch.zeros(self.num_envs, 3, device=self.device)

# * randomize mass
if self.cfg.domain_rand.randomize_base_mass:
lower = self.cfg.domain_rand.lower_mass_offset
upper = self.cfg.domain_rand.upper_mass_offset
# self.mass_
props[0].mass += np.random.uniform(lower, upper)
self.mass[env_id] = props[0].mass
# * randomize com position
lower = self.cfg.domain_rand.lower_z_offset
upper = self.cfg.domain_rand.upper_z_offset
props[0].com.z += np.random.uniform(lower, upper)
self.com[env_id, 2] = props[0].com.z

lower = self.cfg.domain_rand.lower_x_offset
upper = self.cfg.domain_rand.upper_x_offset
props[0].com.x += np.random.uniform(lower, upper)
self.com[env_id, 0] = props[0].com.x
return props

# ------------ reward functions----------------

def _reward_lin_vel_z(self):
Expand Down
78 changes: 78 additions & 0 deletions gym/envs/horse/horse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import torch

from isaacgym.torch_utils import torch_rand_float
from gym.envs.base.legged_robot import LeggedRobot


class Horse(LeggedRobot):
def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
super().__init__(gym, sim, cfg, sim_params, sim_device, headless)

def _reward_lin_vel_z(self):
"""Penalize z axis base linear velocity with squared exp"""
return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"])

def _reward_ang_vel_xy(self):
"""Penalize xy axes base angular velocity"""
error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"])
return torch.sum(error, dim=1)

def _reward_orientation(self):
"""Penalize non-flat base orientation"""
error = (
torch.square(self.projected_gravity[:, :2])
/ self.cfg.reward_settings.tracking_sigma
)
return torch.sum(torch.exp(-error), dim=1)

def _reward_min_base_height(self):
"""Squared exponential saturating at base_height target"""
error = self.base_height - self.cfg.reward_settings.base_height_target
error /= self.scales["base_height"]
error = torch.clamp(error, max=0, min=None).flatten()
return self._sqrdexp(error)

def _reward_tracking_lin_vel(self):
"""Tracking of linear velocity commands (xy axes)"""
# just use lin_vel?
error = self.commands[:, :2] - self.base_lin_vel[:, :2]
# * scale by (1+|cmd|): if cmd=0, no scaling.
error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2]))
error = torch.sum(torch.square(error), dim=1)
return torch.exp(-error / self.cfg.reward_settings.tracking_sigma)

def _reward_tracking_ang_vel(self):
"""Tracking of angular velocity commands (yaw)"""
ang_vel_error = torch.square(
(self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0
)
return self._sqrdexp(ang_vel_error)

def _reward_dof_vel(self):
"""Penalize dof velocities"""
return torch.sum(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1)

def _reward_dof_near_home(self):
return torch.sum(
self._sqrdexp(
(self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"]
),
dim=1,
)

def _resample_commands(self, env_ids):
super()._resample_commands(env_ids)

# resample height
height_range = self.command_ranges["height"]
self.commands[env_ids, 3] = torch_rand_float(
height_range[0], height_range[1], (len(env_ids), 1), device=self.device
).squeeze(1)

def _reward_tracking_height(self):
"""Reward for base height."""
# error between current and commanded height
error = self.base_height.flatten() - self.commands[:, 3].flatten()
error /= self.scales["base_height"]

return self._sqrdexp(error)
Loading