diff --git a/.gitignore b/.gitignore index c78e6696..2a17069a 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,8 @@ gym/wandb *.npz user/wandb_config.json *trajectories/ +*.png +scaling_analysis/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index f9d8b788..25b20da1 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -15,6 +15,8 @@ "MiniCheetah": ".mini_cheetah.mini_cheetah", "MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref", "MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc", + "Horse": ".horse.horse", + "HorseOsc": ".horse.horse_osc", "MIT_Humanoid": ".mit_humanoid.mit_humanoid", "Anymal": ".anymal_c.anymal", "A1": ".a1.a1", @@ -28,6 +30,8 @@ "MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config", "MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACCfg": ".mini_cheetah.mini_cheetah_SAC_config", + "HorseCfg": ".horse.horse_config", + "HorseOscCfg": ".horse.horse_osc_config", "MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config", "A1Cfg": ".a1.a1_config", "AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -42,6 +46,8 @@ "MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config", "MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config", "MiniCheetahSACRunnerCfg": ".mini_cheetah.mini_cheetah_SAC_config", + "HorseRunnerCfg": ".horse.horse_config", + "HorseOscRunnerCfg": ".horse.horse_osc_config", "MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config", "A1RunnerCfg": ".a1.a1_config", "AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config", @@ -68,6 +74,12 @@ "MiniCheetahSACCfg", "MiniCheetahSACRunnerCfg" ], + "horse": ["Horse", "HorseCfg", "HorseRunnerCfg"], + "horse_osc": [ + "HorseOsc", + "HorseOscCfg", + "HorseOscRunnerCfg", + ], "humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"], "humanoid_running": [ "HumanoidRunning", diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py index 404c62e4..2344e05f 100644 --- a/gym/envs/base/legged_robot.py +++ b/gym/envs/base/legged_robot.py @@ -517,8 +517,9 @@ def _init_buffers(self): dtype=torch.float, device=self.device, ) + # add height as the 4th command self.commands = torch.zeros( - self.num_envs, 3, dtype=torch.float, device=self.device + self.num_envs, 4, dtype=torch.float, device=self.device ) self.base_lin_vel = quat_rotate_inverse( self.base_quat, self.root_states[:, 7:10] @@ -986,6 +987,31 @@ def _sqrdexp(self, x, scale=1.0): -torch.square(x / scale) / self.cfg.reward_settings.tracking_sigma ) + def _process_rigid_body_props(self, props, env_id): + if env_id == 0: + # * init buffers for the domain rand changes + self.mass = torch.zeros(self.num_envs, 1, device=self.device) + self.com = torch.zeros(self.num_envs, 3, device=self.device) + + # * randomize mass + if self.cfg.domain_rand.randomize_base_mass: + lower = self.cfg.domain_rand.lower_mass_offset + upper = self.cfg.domain_rand.upper_mass_offset + # self.mass_ + props[0].mass += np.random.uniform(lower, upper) + self.mass[env_id] = props[0].mass + # * randomize com position + lower = self.cfg.domain_rand.lower_z_offset + upper = self.cfg.domain_rand.upper_z_offset + props[0].com.z += np.random.uniform(lower, upper) + self.com[env_id, 2] = props[0].com.z + + lower = self.cfg.domain_rand.lower_x_offset + upper = self.cfg.domain_rand.upper_x_offset + props[0].com.x += np.random.uniform(lower, upper) + self.com[env_id, 0] = props[0].com.x + return props + # ------------ reward functions---------------- def _reward_lin_vel_z(self): diff --git a/gym/envs/horse/horse.py b/gym/envs/horse/horse.py new file mode 100644 index 00000000..bb2e545e --- /dev/null +++ b/gym/envs/horse/horse.py @@ -0,0 +1,78 @@ +import torch + +from isaacgym.torch_utils import torch_rand_float +from gym.envs.base.legged_robot import LeggedRobot + + +class Horse(LeggedRobot): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + + def _reward_lin_vel_z(self): + """Penalize z axis base linear velocity with squared exp""" + return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"]) + + def _reward_ang_vel_xy(self): + """Penalize xy axes base angular velocity""" + error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"]) + return torch.sum(error, dim=1) + + def _reward_orientation(self): + """Penalize non-flat base orientation""" + error = ( + torch.square(self.projected_gravity[:, :2]) + / self.cfg.reward_settings.tracking_sigma + ) + return torch.sum(torch.exp(-error), dim=1) + + def _reward_min_base_height(self): + """Squared exponential saturating at base_height target""" + error = self.base_height - self.cfg.reward_settings.base_height_target + error /= self.scales["base_height"] + error = torch.clamp(error, max=0, min=None).flatten() + return self._sqrdexp(error) + + def _reward_tracking_lin_vel(self): + """Tracking of linear velocity commands (xy axes)""" + # just use lin_vel? + error = self.commands[:, :2] - self.base_lin_vel[:, :2] + # * scale by (1+|cmd|): if cmd=0, no scaling. + error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2])) + error = torch.sum(torch.square(error), dim=1) + return torch.exp(-error / self.cfg.reward_settings.tracking_sigma) + + def _reward_tracking_ang_vel(self): + """Tracking of angular velocity commands (yaw)""" + ang_vel_error = torch.square( + (self.commands[:, 2] - self.base_ang_vel[:, 2]) / 5.0 + ) + return self._sqrdexp(ang_vel_error) + + def _reward_dof_vel(self): + """Penalize dof velocities""" + return torch.sum(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1) + + def _reward_dof_near_home(self): + return torch.sum( + self._sqrdexp( + (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"] + ), + dim=1, + ) + + def _resample_commands(self, env_ids): + super()._resample_commands(env_ids) + + # resample height + height_range = self.command_ranges["height"] + self.commands[env_ids, 3] = torch_rand_float( + height_range[0], height_range[1], (len(env_ids), 1), device=self.device + ).squeeze(1) + + def _reward_tracking_height(self): + """Reward for base height.""" + # error between current and commanded height + error = self.base_height.flatten() - self.commands[:, 3].flatten() + error /= self.scales["base_height"] + + return self._sqrdexp(error) diff --git a/gym/envs/horse/horse_config.py b/gym/envs/horse/horse_config.py new file mode 100644 index 00000000..699aff5f --- /dev/null +++ b/gym/envs/horse/horse_config.py @@ -0,0 +1,262 @@ +from gym.envs.base.legged_robot_config import ( + LeggedRobotCfg, + LeggedRobotRunnerCfg, +) + +BASE_HEIGHT_REF = 1.3 + + +class HorseCfg(LeggedRobotCfg): + class env(LeggedRobotCfg.env): + num_envs = 2**12 + num_actuators = 1 + 4 * 5 + episode_length_s = 10 + + class terrain(LeggedRobotCfg.terrain): + mesh_type = "plane" + + class init_state(LeggedRobotCfg.init_state): + default_joint_angles = { + "haa": 0.0, + "hfe": 0.0, + "kfe": 0.0, + "pfe": 0.0, + "pastern_to_foot": 0.0, + "base_joint": 0.0, + } + + # * reset setup chooses how the initial conditions are chosen. + # * "reset_to_basic" = a single position + # * "reset_to_range" = uniformly random from a range defined below + reset_mode = "reset_to_range" + + # * default COM for basic initialization + pos = [0.0, 0.0, 1.4] # x,y,z [m] + rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] + lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] + ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] + + # * initialization for random range setup + # these are the physical limits in the URDF as of 17 Nov 2025 + # dof_pos_range = { + # "haa": [-0.2, 0.2], + # "hfe": [-0.7, 0.6], + # "kfe": [-1.3, 0.1], + # "pfe": [-0.3, 2.2], + # "pastern_to_foot": [-0.3, 1.8], + # "base_joint": [-0.2, 0.2], + # } + dof_pos_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.7, 0.6], + "kfe": [-1.3, 0.1], + "pfe": [-0.3, 2.2], + "pastern_to_foot": [-0.3, 1.8], + "base_joint": [-0.2, 0.2], + } + dof_vel_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.2, 0.2], + "kfe": [-0.2, 0.2], + "pfe": [-0.2, 0.2], + "pastern_to_foot": [-0.2, 0.2], + "base_joint": [-0.2, 0.2], + } + + root_pos_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [1.3, 1.3], # z + [-0.2, 0.2], # roll + [-0.2, 0.2], # pitch + [-0.2, 0.2], # yaw + ] + root_vel_range = [ + [-0.5, 5.0], # x + [0.0, 0.0], # y + [-0.05, 0.05], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + + class control(LeggedRobotCfg.control): + # * PD Drive parameters: + stiffness = { + "haa": 4000, + "hfe": 4000, + "kfe": 4000, + "pfe": 4000, + "pastern_to_foot": 4000, + "base_joint": 50, + } + damping = { + "haa": 250, + "hfe": 250, + "kfe": 250, + "pfe": 250, + "pastern_to_foot": 250, + "base_joint": 10, + } + ctrl_frequency = 250 # how often the PDF controller/action updates run + desired_sim_frequency = 500 # how often the physics is calculated + + class commands: + # * time before command are changed[s] + resampling_time = 3.0 + + class ranges: + lin_vel_x = [-1.0, 8.0] # min max [m/s] + lin_vel_y = 1.0 # max [m/s] + yaw_vel = 6 # max [rad/s] + height = [0.61, 1.30] # m + + class push_robots: + toggle = False + interval_s = 1 + max_push_vel_xy = 0.5 + push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] + + class domain_rand: + randomize_friction = True + friction_range = [0.5, 5.0] + randomize_base_mass = False + added_mass_range = [-1.0, 1.0] + + class asset(LeggedRobotCfg.asset): + file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" + foot_name = "foot" + penalize_contacts_on = ["shank"] + terminate_after_contacts_on = ["base"] + end_effector_names = ["foot"] + collapse_fixed_joints = False + self_collisions = 1 + flip_visual_attachments = False + disable_gravity = False + disable_motors = False + joint_damping = 0.3 + fix_base_link = False + rotor_inertia = [0.002268] + 4 * ( + [0.002268, 0.002268, 0.005484, 0.005484, 0.005484] + ) + + class reward_settings(LeggedRobotCfg.reward_settings): + soft_dof_pos_limit = 0.9 + soft_dof_vel_limit = 0.9 + soft_torque_limit = 0.9 + max_contact_force = 600.0 + base_height_target = BASE_HEIGHT_REF + tracking_sigma = 0.25 + + class scaling(LeggedRobotCfg.scaling): + base_ang_vel = 0.3 + base_lin_vel = BASE_HEIGHT_REF + dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + (4 * [0.4, 1.3, 1.4, 2.5, 2.1]) + dof_pos_obs = dof_pos + dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used + commands = [3, 1, 3, 1] # add height as a command + + +class HorseRunnerCfg(LeggedRobotRunnerCfg): + seed = -1 + runner_class_name = "OnPolicyRunner" + + class actor(LeggedRobotRunnerCfg.actor): + hidden_dims = [256, 256, 128] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + normalize_obs = True + actions = ["dof_pos_target"] + add_noise = True + disable_actions = False + + class noise: + scale = 1.0 + dof_pos_obs = 0.01 + base_ang_vel = 0.01 + dof_pos = 0.005 + dof_vel = 0.005 + lin_vel = 0.05 + ang_vel = [0.3, 0.15, 0.4] + gravity_vec = 0.1 + + class critic(LeggedRobotRunnerCfg.critic): + hidden_dims = [128, 64] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + normalize_obs = True + + class reward: + class weights: + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 + lin_vel_z = 0.0 + ang_vel_xy = 0.01 + orientation = 1.0 + torques = 5.0e-7 + dof_vel = 0.0 + min_base_height = 1.5 + action_rate = 1e-4 + action_rate2 = 1e-5 + stand_still = 0.0 + dof_pos_limits = 0.0 + feet_contact_forces = 0.0 + dof_near_home = 0.0 + tracking_height = 1.5 + + class termination_weight: + termination = 0.01 + + class algorithm(LeggedRobotRunnerCfg.algorithm): + class algorithm: + # both + gamma = 0.99 + lam = 0.95 + # shared + batch_size = 2**15 + max_gradient_steps = 24 + # new + storage_size = 2**17 # new + batch_size = 2**15 # new + + clip_param = 0.2 + learning_rate = 1.0e-3 + max_grad_norm = 1.0 + # Critic + use_clipped_value_loss = True + # Actor + entropy_coef = 0.01 + schedule = "adaptive" # could be adaptive, fixed + desired_kl = 0.01 + lr_range = [2e-4, 1e-2] + lr_ratio = 1.3 + + class runner(LeggedRobotRunnerCfg.runner): + run_name = "" + experiment_name = "horse" + max_iterations = 1000 + algorithm_class_name = "PPO2" + num_steps_per_env = 32 diff --git a/gym/envs/horse/horse_osc.py b/gym/envs/horse/horse_osc.py new file mode 100644 index 00000000..5c0ef2c1 --- /dev/null +++ b/gym/envs/horse/horse_osc.py @@ -0,0 +1,725 @@ +import torch +from isaacgym.torch_utils import torch_rand_float + +from gym.envs.horse.horse import Horse + +from isaacgym import gymtorch + +HORSE_WEIGHT = 536.38 * 9.81 # Weight of horse in Newtons +BASE_HEIGHT_REF = 1.3 + + +class HorseOsc(Horse): + def __init__(self, gym, sim, cfg, sim_params, sim_device, headless): + super().__init__(gym, sim, cfg, sim_params, sim_device, headless) + + def _init_buffers(self): + super()._init_buffers() + + BASE = 0 + + RH = dict(haa=1, hfe=2, kfe=3, pfe=4, pastern=5) + LH = dict(haa=6, hfe=7, kfe=8, pfe=9, pastern=10) + RF = dict(haa=11, hfe=12, kfe=13, pfe=14, pastern=15) + LF = dict(haa=16, hfe=17, kfe=18, pfe=19, pastern=20) + + # tensors for vectorized ops + self.idx = { + "base": torch.tensor([BASE], device=self.device), + # all legs by joint type (RH, LH, RF, LF order) + "haa": torch.tensor( + [RH["haa"], LH["haa"], RF["haa"], LF["haa"]], device=self.device + ), + "hfe": torch.tensor( + [RH["hfe"], LH["hfe"], RF["hfe"], LF["hfe"]], device=self.device + ), + "kfe": torch.tensor( + [RH["kfe"], LH["kfe"], RF["kfe"], LF["kfe"]], device=self.device + ), + "pfe": torch.tensor( + [RH["pfe"], LH["pfe"], RF["pfe"], LF["pfe"]], device=self.device + ), + "pastern": torch.tensor( + [RH["pastern"], LH["pastern"], RF["pastern"], LF["pastern"]], + device=self.device, + ), + # hind vs front splits (HIND = RH, LH / FRONT = RF, LF) + "hind_haa": torch.tensor([RH["haa"], LH["haa"]], device=self.device), + "hind_hfe": torch.tensor([RH["hfe"], LH["hfe"]], device=self.device), + "hind_kfe": torch.tensor([RH["kfe"], LH["kfe"]], device=self.device), + "hind_pfe": torch.tensor([RH["pfe"], LH["pfe"]], device=self.device), + "hind_pastern": torch.tensor( + [RH["pastern"], LH["pastern"]], device=self.device + ), + "front_haa": torch.tensor([RF["haa"], LF["haa"]], device=self.device), + "front_hfe": torch.tensor([RF["hfe"], LF["hfe"]], device=self.device), + "front_kfe": torch.tensor([RF["kfe"], LF["kfe"]], device=self.device), + "front_pfe": torch.tensor([RF["pfe"], LF["pfe"]], device=self.device), + "front_pastern": torch.tensor( + [RF["pastern"], LF["pastern"]], device=self.device + ), + # legs (all joints within each leg) + "rh_leg": torch.tensor( + [RH["haa"], RH["hfe"], RH["kfe"], RH["pfe"], RH["pastern"]], + device=self.device, + ), + "lh_leg": torch.tensor( + [LH["haa"], LH["hfe"], LH["kfe"], LH["pfe"], LH["pastern"]], + device=self.device, + ), + "rf_leg": torch.tensor( + [RF["haa"], RF["hfe"], RF["kfe"], RF["pfe"], RF["pastern"]], + device=self.device, + ), + "lf_leg": torch.tensor( + [LF["haa"], LF["hfe"], LF["kfe"], LF["pfe"], LF["pastern"]], + device=self.device, + ), + # group by front or hind legs + "hind_legs": torch.tensor( + [ + RH["haa"], + RH["hfe"], + RH["kfe"], + RH["pfe"], + RH["pastern"], + LH["haa"], + LH["hfe"], + LH["kfe"], + LH["pfe"], + LH["pastern"], + ], + device=self.device, + ), + "front_legs": torch.tensor( + [ + RF["haa"], + RF["hfe"], + RF["kfe"], + RF["pfe"], + RF["pastern"], + LF["haa"], + LF["hfe"], + LF["kfe"], + LF["pfe"], + LF["pastern"], + ], + device=self.device, + ), + } + + self.prev_height_error = torch.zeros(self.num_envs, device=self.device) + self.lie_down_done = torch.zeros( + self.num_envs, dtype=torch.bool, device=self.device + ) + + self.oscillators = torch.zeros(self.num_envs, 4, device=self.device) + self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device) + + self.oscillators_vel = torch.zeros_like(self.oscillators) + self.grf = torch.zeros(self.num_envs, 4, device=self.device) + self.osc_omega = self.cfg.osc.omega * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_coupling = self.cfg.osc.coupling * torch.ones( + self.num_envs, 1, device=self.device + ) + self.osc_offset = self.cfg.osc.offset * torch.ones( + self.num_envs, 1, device=self.device + ) + + env_ids = torch.arange(self.num_envs, device=self.device, dtype=torch.long) + + self._reset_to_custom_pos(env_ids, self.root_states[env_ids, 2].clone()) + + # resample height command + h_min, h_max = self.cfg.commands.ranges.height + self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand( + (len(env_ids),), device=self.device + ) + + def _reset_oscillators(self, env_ids): + if len(env_ids) == 0: + return + # * random + if self.cfg.osc.init_to == "random": + self.oscillators[env_ids] = torch_rand_float( + 0, + 2 * torch.pi, + shape=self.oscillators[env_ids].shape, + device=self.device, + ) + elif self.cfg.osc.init_to == "standing": + self.oscillators[env_ids] = 3 * torch.pi / 2 + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + + def _reset_system(self, env_ids): + if len(env_ids) == 0: + return + self._reset_oscillators(env_ids) + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + # * keep some robots in the same starting state as they ended + timed_out_subset = (self.timed_out & ~self.terminated) * ( + torch.rand(self.num_envs, device=self.device) + < self.cfg.init_state.timeout_reset_ratio + ) + + env_ids = (self.terminated | timed_out_subset).nonzero().flatten() + if len(env_ids) == 0: + return + super()._reset_system(env_ids) + + # pose horse based set height + self._reset_to_custom_pos(env_ids, self.root_states[env_ids, 2].clone()) + + # resample height command + h_min, h_max = self.cfg.commands.ranges.height + self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand( + (len(env_ids),), device=self.device + ) + + if len(env_ids) > 0: + h_cmd = self.commands[env_ids, 3].flatten() + h_now = self.base_height[env_ids].flatten() + self.prev_height_error[env_ids] = torch.abs(h_now - h_cmd) + self.lie_down_done[env_ids] = False + + def _pre_decimation_step(self): + super()._pre_decimation_step() + # self.grf = self._compute_grf() + self.compute_osc_slope() + + def compute_osc_slope(self): + cmd_x = torch.abs(self.commands[:, 0:1]) - self.cfg.osc.stop_threshold + stop = cmd_x < 0 + + self.osc_offset = stop * self.cfg.osc.offset + self.osc_omega = stop * self.cfg.osc.omega_stop + self.osc_coupling = stop * self.cfg.osc.coupling_stop + + self.osc_omega += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.omega_slope + self.cfg.osc.omega_step, + min=0.0, + max=self.cfg.osc.omega_max, + ) + self.osc_coupling += (~stop) * torch.clamp( + cmd_x * self.cfg.osc.coupling_slope + self.cfg.osc.coupling_step, + min=0.0, + max=self.cfg.osc.coupling_max, + ) + + self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) + self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) + + def _post_decimation_step(self): + """Update all states that are not handled in PhysX""" + super()._post_decimation_step() + self.grf = self._compute_grf() + # self._step_oscillators() + + def _post_physx_step(self): + super()._post_physx_step() + self._step_oscillators(self.dt / self.cfg.control.decimation) + return None + + def _step_oscillators(self, dt=None): + if dt is None: + dt = self.dt + + local_feedback = self.osc_coupling * ( + torch.cos(self.oscillators) + self.osc_offset + ) + grf = self._compute_grf() + self.oscillators_vel = self.osc_omega - grf * local_feedback + self.oscillators_vel += ( + torch.randn(self.oscillators_vel.shape, device=self.device) + * self.cfg.osc.process_noise_std + ) + + self.oscillators_vel *= 2 * torch.pi + self.oscillators += self.oscillators_vel * dt + self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) + self.oscillator_obs = torch.cat( + (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 + ) + + def _resample_commands(self, env_ids): + """Randommly select commands of some environments + + Args: + env_ids (List[int]): Environments ids for which new commands are needed + """ + if len(env_ids) == 0: + return + super()._resample_commands(env_ids) + possible_commands = torch.tensor( + self.command_ranges["lin_vel_x"], device=self.device + ) + self.commands[env_ids, 0:1] = possible_commands[ + torch.randint( + 0, len(possible_commands), (len(env_ids), 1), device=self.device + ) + ] + # add some gaussian noise to the commands + self.commands[env_ids, 0:1] += ( + torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var + ) + + if 0 in self.cfg.commands.ranges.lin_vel_x: + # * with 20% chance, reset to 0 commands except for forward + self.commands[env_ids, 1:] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 20% chance, reset to 0 commands except for rotation + self.commands[env_ids, :2] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.8 + ).unsqueeze(1) + # * with 10% chance, reset to 0 + self.commands[env_ids, :] *= ( + torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1) + < 0.9 + ).unsqueeze(1) + + if self.cfg.osc.randomize_osc_params: + self._resample_osc_params(env_ids) + + # resample height command + h_min, h_max = self.cfg.commands.ranges.height + self.commands[env_ids, 3] = h_min + (h_max - h_min) * torch.rand( + (len(env_ids),), device=self.device + ) + + def _resample_osc_params(self, env_ids): + if len(env_ids) > 0: + self.osc_omega[env_ids, 0] = torch_rand_float( + self.cfg.osc.omega_range[0], + self.cfg.osc.omega_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_coupling[env_ids, 0] = torch_rand_float( + self.cfg.osc.coupling_range[0], + self.cfg.osc.coupling_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + self.osc_offset[env_ids, 0] = torch_rand_float( + self.cfg.osc.offset_range[0], + self.cfg.osc.offset_range[1], + (len(env_ids), 1), + device=self.device, + ).squeeze(1) + + def perturb_base_velocity(self, velocity_delta, env_ids=None): + if env_ids is None: + env_ids = [range(self.num_envs)] + self.root_states[env_ids, 7:10] += velocity_delta + self.gym.set_actor_root_state_tensor( + self.sim, gymtorch.unwrap_tensor(self.root_states) + ) + + def _compute_grf(self, grf_norm=True): + grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) + if grf_norm: + return torch.clamp_max(grf / HORSE_WEIGHT, 1.0) + else: + return grf + + def _switch(self): + c_vel = torch.linalg.norm(self.commands, dim=1) + return torch.exp( + -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1)) + / self.cfg.reward_settings.switch_scale + ) + + def _reward_lin_vel_z(self): + """Penalize z axis base linear velocity with squared exp""" + return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"][0]) + + def _reward_ang_vel_xy(self): + """Penalize xy axes base angular velocity""" + error = self._sqrdexp(self.base_ang_vel[:, :2] / self.scales["base_ang_vel"][0]) + return torch.sum(error, dim=1) + + # modified this method so joint index list uses horse 5 DOF legs, not 3 DOF + def _reward_cursorial(self): + """ + Encourage legs under body, joints stay neutral, avoid inverted/folded poses + Penalize key joints drifting from home. For a horse with 5 joints per leg, + we penalize (haa, hfe, kfe) for each of the 4 legs. + Distal joint (pfe, pastern_to_foot) naturally flex a lot, shock absorbers + """ + legs = torch.tensor(list(range(1, 21)), device=self.device) + return -torch.mean(torch.square(self.dof_pos[:, legs]), dim=1) + + def _reward_swing_grf(self): + # Reward non-zero grf during swing (0 to pi) + rew = self.get_swing_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return -torch.sum(rew, dim=1) + + def _reward_stance_grf(self): + # Reward non-zero grf during stance (pi to 2pi) + rew = self.get_stance_grf(self.cfg.osc.osc_bool, self.cfg.osc.grf_bool) + return torch.sum(rew, dim=1) + + def get_swing_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.lt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def get_stance_grf(self, osc_bool=False, contact_bool=False): + if osc_bool: + phase = torch.gt(self.oscillators, torch.pi).int() + else: + phase = torch.maximum( + torch.zeros_like(self.oscillators), -torch.sin(self.oscillators) + ) + if contact_bool: + return phase * torch.gt(self._compute_grf(), self.cfg.osc.grf_threshold) + else: + return phase * self._compute_grf() + + def _reward_coupled_grf(self): + """ + Multiply rewards for stance/swing grf, discount when undesirable + behavior (grf during swing, no grf during stance) + """ + swing_rew = self.get_swing_grf() + stance_rew = self.get_stance_grf() + combined_rew = self._sqrdexp(swing_rew * 2) + stance_rew + prod = torch.prod(torch.clip(combined_rew, 0, 1), dim=1) + return prod - torch.ones_like(prod) + + def _reward_dof_vel(self): + """Penalize dof velocities""" + return super()._reward_dof_vel() * self._switch() + + def _reward_dof_near_home(self): + return super()._reward_dof_near_home() * self._switch() + + def _reward_stand_still(self): + """Penalize any movement when at zero commands.""" + # normalize angles so we care about being within ~5 degrees + rew_pos = torch.mean( + self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1 + ) + rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1) + rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1) + rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1) + return (rew_vel + rew_pos - rew_base_vel) * self._switch() + + def _reward_standing_torques(self): + """Penalize torques at zero commands""" + return super()._reward_torques() * self._switch() + + # * gait similarity scores + def angle_difference(self, theta1, theta2): + diff = torch.abs(theta1 - theta2) % (2 * torch.pi) + return torch.min(diff, 2 * torch.pi - diff) + + def _reward_tracking_height(self): + """Reward for base height.""" + # error between current and commanded height + error = self.base_height.flatten() - self.commands[:, 3].flatten() + error /= self.scales["base_height"] + + return self._sqrdexp(error) + + def _reward_tendon_constraints(self): + """ + Tendon-like coupling constraints (front/hind split): + - Desired KFE/PFE become dependant on HFE. + - Penalize deviation from those desired values. + """ + + # --- helpers --- + def lerp(x, x0, x1, y0, y1): + """Linear interpolation y(x) between (x0,y0)->(x1,y1), with clamping.""" + # handle degenerate interval + denom = x1 - x0 + denom = torch.where(torch.abs(denom) < 1e-8, torch.ones_like(denom), denom) + t = (x - x0) / denom + t = torch.clamp(t, 0.0, 1.0) + return y0 + t * (y1 - y0) + + def piecewise_2seg(x, mid, x_lo, x_hi, y_lo, y_mid, y_hi): + """ + Piecewise linear passing through: + (x=mid, y=y_mid) + and reaching: + (x=x_lo, y=y_lo) for x <= mid + (x=x_hi, y=y_hi) for x >= mid + """ + y_left = lerp( + x, mid, x_lo, y_mid, y_lo + ) # x in [x_lo, mid] (or beyond -> clamped) + y_right = lerp( + x, mid, x_hi, y_mid, y_hi + ) # x in [mid, x_hi] (or beyond -> clamped) + return torch.where(x <= mid, y_left, y_right) + + # make desired match (N,2) like the actual joint tensor + def _match_legs(desired, actual_legs): + # actual_legs: (N,2) + if desired.dim() == 0: + return desired.view(1, 1).expand_as(actual_legs) + if desired.dim() == 1: + return desired.unsqueeze(1).expand_as(actual_legs) # (N,) -> (N,2) + if desired.dim() == 2: + # if already (N,2) it's good + return desired + raise RuntimeError(f"Unexpected desired shape: {tuple(desired.shape)}") + + # Hind constraints (RH, LH) + hfe_hind = self.dof_pos[:, self.idx["hind_hfe"]] # [N,2] + kfe_hind = self.dof_pos[:, self.idx["hind_kfe"]] + pfe_hind = self.dof_pos[:, self.idx["hind_pfe"]] + + # 3) hind hfe: 0 -> -1.5 => kfe: 0 -> +1.0, pfe: 0 -> -1.2 + # 4) hind hfe: 0 -> +0.5 => kfe: 0 -> -0.2, pfe: 0 -> -0.5 + kfe_hind_des = piecewise_2seg( + hfe_hind, + mid=torch.zeros_like(hfe_hind), # at hfe=0 + x_lo=-1.5 * torch.ones_like(hfe_hind), + x_hi=+0.5 * torch.ones_like(hfe_hind), + y_lo=+1.0 * torch.ones_like(hfe_hind), + y_mid=0.0 * torch.ones_like(hfe_hind), + y_hi=-0.2 * torch.ones_like(hfe_hind), + ) + + pfe_hind_des = piecewise_2seg( + hfe_hind, + mid=torch.zeros_like(hfe_hind), + x_lo=-1.5 * torch.ones_like(hfe_hind), + x_hi=+0.5 * torch.ones_like(hfe_hind), + y_lo=-1.2 * torch.ones_like(hfe_hind), + y_mid=0.0 * torch.ones_like(hfe_hind), + y_hi=-0.5 * torch.ones_like(hfe_hind), + ) + + # Front constraints (RF, LF) + hfe_front = self.dof_pos[:, self.idx["front_hfe"]] # [N,2] + kfe_front = self.dof_pos[:, self.idx["front_kfe"]] + pfe_front = self.dof_pos[:, self.idx["front_pfe"]] + + # 5) front hfe: 0 -> +0.6 => kfe: 0 -> 0, pfe: 0 -> 0 + # 6) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +1.5 + # 7) front hfe: 0 -> -1.0 => kfe: 0 -> -1.5, pfe: 0 -> +3.0 + + zeros = torch.zeros_like(hfe_front) + ones = torch.ones_like(hfe_front) + + # kfe: flat 0 for hfe in [0, +0.6], ramp to -1.5 for hfe in [-1.0, 0] + kfe_front_des = piecewise_2seg( + hfe_front, + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=-1.5 * ones, # at hfe=-1.0 + y_mid=0.0 * ones, # at hfe=0 + y_hi=0.0 * ones, # at hfe=+0.6 + ) + + # pfe: flat 0 for hfe in [0, +0.6], ramp to +1.5 for hfe in [-1.0, 0] + pfe_front_des_15 = piecewise_2seg( + hfe_front, + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=+1.5 * ones, # at hfe=-1.0 + y_mid=0.0 * ones, # at hfe=0 + y_hi=0.0 * ones, # at hfe=+0.6 + ) + + # pfe: flat 0 for hfe in [0, +0.6], ramp to +3.0 for hfe in [-1.0, 0] + pfe_front_des_30 = piecewise_2seg( + hfe_front, + mid=zeros, + x_lo=-1.0 * ones, + x_hi=+0.6 * ones, + y_lo=+3.0 * ones, # at hfe=-1.0 + y_mid=0.0 * ones, # at hfe=0 + y_hi=0.0 * ones, # at hfe=+0.6 + ) + + # --- HIND --- + kfe_hind_des_ = _match_legs(kfe_hind_des, kfe_hind) # -> (N,2) + pfe_hind_des_ = _match_legs(pfe_hind_des, pfe_hind) # -> (N,2) + hind_pen = (kfe_hind - kfe_hind_des_) ** 2 + ( + pfe_hind - pfe_hind_des_ + ) ** 2 # (N,2) + + # --- FRONT --- + # KFE: flat 0 for hfe in [0, 0.6], ramp to -1.5 for hfe in [-1,0] + kfe_front_des_ = _match_legs(kfe_front_des, kfe_front) # -> (N,2) + kfe_err = (kfe_front - kfe_front_des_) ** 2 # (N,2) + + # negative HFE: + # - PFE: 0 -> +1.5 (hfe 0 -> -1) + # - PFE: 0 -> +3.0 (hfe 0 -> -1) + pfe_front_des15_ = _match_legs(pfe_front_des_15, pfe_front) # -> (N,2) + pfe_front_des30_ = _match_legs(pfe_front_des_30, pfe_front) # -> (N,2) + + pfe_err15 = (pfe_front - pfe_front_des15_) ** 2 + pfe_err30 = (pfe_front - pfe_front_des30_) ** 2 + + # BOTH branches acceptable always: + pfe_term = torch.minimum(pfe_err15, pfe_err30) # (N,2) + + front_pen = kfe_err + pfe_term # (N,2) + + # --- aggregate per env --- + pen = hind_pen.mean(dim=1) + front_pen.mean(dim=1) # (N,) + return -pen + + def _reset_to_custom_pos(self, env_ids, reset_h): + env_ids = env_ids.to(dtype=torch.long, device=self.device) + B = env_ids.numel() + if B == 0: + return + + # --- normalize reset_h to (B,) on device --- + if not torch.is_tensor(reset_h): + reset_h = torch.tensor( + reset_h, device=self.device, dtype=torch.float32 + ).repeat(B) + else: + reset_h = reset_h.to(device=self.device, dtype=torch.float32) + if reset_h.numel() == 1: + reset_h = reset_h.repeat(B) + elif reset_h.shape[0] != B: + # if passed full (num_envs,), subset it + if reset_h.shape[0] == self.num_envs: + reset_h = reset_h[env_ids] + else: + raise RuntimeError( + f"reset_h shape {reset_h.shape} incompatible env_id batch {B}" + ) + + # env_ids must be LongTensor on device + env_ids = env_ids.to(device=self.device, dtype=torch.long) + + # Get a per-env base pose, regardless of how default_dof_pos is stored + ddp = self.default_dof_pos + + if ddp.dim() == 1: + # (num_dofs,) -> make (B, num_dofs) + base_pose = ddp.unsqueeze(0).repeat(env_ids.numel(), 1).clone() + elif ddp.dim() == 2 and ddp.shape[0] == 1: + # (1, num_dofs) -> expand to (B, num_dofs) + base_pose = ddp.expand(env_ids.numel(), -1).clone() + elif ddp.dim() == 2 and ddp.shape[0] == self.num_envs: + # (num_envs, num_dofs) -> index by env ids + base_pose = ddp[env_ids].clone() + else: + raise RuntimeError(f"Unexpected default_dof_pos shape: {tuple(ddp.shape)}") + + # Decide stand vs lie by z pos + h_mid = 0.7 + + use_stand = reset_h >= h_mid # (B,) bool + + stand_pose = base_pose.clone() + lie_pose = base_pose.clone() + + # DOF map: 0 base_joint + # RH: 1..5, LH: 6..10, RF: 11..15, LF: 16..20 (haa,hfe,kfe,pfe,pastern) + + pos_range = self.cfg.init_state.dof_pos_range + + for base in [1, 6, 11, 16]: + haa = base + 0 + hfe = base + 1 + kfe = base + 2 + pfe = base + 3 + pas = base + 4 + + # standing (tune) + stand_pose[:, haa] = pos_range["haa"][0] + ( + pos_range["haa"][1] - pos_range["haa"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, hfe] = pos_range["hfe"][0] + ( + pos_range["hfe"][1] - pos_range["hfe"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, kfe] = pos_range["kfe"][0] + ( + pos_range["kfe"][1] - pos_range["kfe"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, pfe] = pos_range["pfe"][0] + ( + pos_range["pfe"][1] - pos_range["pfe"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + stand_pose[:, pas] = pos_range["pastern_to_foot"][0] + ( + pos_range["pastern_to_foot"][1] - pos_range["pastern_to_foot"][0] + ) * torch.rand((lie_pose.shape[0],), device=self.device) + + # front kneel (tune) + for base in [11, 16]: + haa = base + 0 + hfe = base + 1 + kfe = base + 2 + pfe = base + 3 + pas = base + 4 + lie_pose[:, haa] = 0.0 + lie_pose[:, hfe] = -1.0 + (0.2 - -1.0) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, kfe] = -1.5 + (-0.5 - -1.5) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pfe] = -0.3 + (3.0 - -0.3) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pas] = -0.3 + (1.8 - -0.3) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + + # hind tuck (tune) + for base in [1, 6]: + haa = base + 0 + hfe = base + 1 + kfe = base + 2 + pfe = base + 3 + pas = base + 4 + lie_pose[:, haa] = 0.0 + lie_pose[:, hfe] = -1.5 + (-1 - -1.5) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, kfe] = -0.2 + (1 - -0.2) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pfe] = 0.0 + (-1.2 - 0.0) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + lie_pose[:, pas] = -0.3 + (1.8 - -0.3) * torch.rand( + (lie_pose.shape[0],), device=self.device + ) + + new_pose = torch.where(use_stand.unsqueeze(1), stand_pose, lie_pose) + + # clamp to limits (dof_pos_limits: (num_dof, 2)) + lo = self.dof_pos_limits[:, 0].unsqueeze(0) + hi = self.dof_pos_limits[:, 1].unsqueeze(0) + new_pose = torch.max(torch.min(new_pose, hi), lo) + + # apply to buffers + self.dof_pos[env_ids] = new_pose + self.dof_vel[env_ids] = 0.0 + + if ( + hasattr(self, "dof_state") + and self.dof_state.dim() == 3 + and self.dof_state.shape[2] >= 2 + ): + self.dof_state[env_ids, :, 0] = new_pose + self.dof_state[env_ids, :, 1] = 0.0 diff --git a/gym/envs/horse/horse_osc_config.py b/gym/envs/horse/horse_osc_config.py new file mode 100644 index 00000000..436af043 --- /dev/null +++ b/gym/envs/horse/horse_osc_config.py @@ -0,0 +1,312 @@ +from gym.envs.horse.horse_config import ( + HorseCfg, + HorseRunnerCfg, +) + +BASE_HEIGHT_REF = 1.3 + + +class HorseOscCfg(HorseCfg): + class env(HorseCfg.env): + num_envs = 2**12 + num_actuators = 1 + 4 * 5 + episode_length_s = 10 + + class terrain(HorseCfg.terrain): + mesh_type = "plane" + # mesh_type = 'trimesh' # none, plane, heightfield or trimesh + + class init_state(HorseCfg.init_state): + timeout_reset_ratio = 0.75 + reset_mode = "reset_to_range" + # * default COM for basic initialization + pos = [0.0, 0.0, 1.35] # x,y,z [m] + rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] + lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s] + ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s] + default_joint_angles = { + "haa": 0.0, + "hfe": 0.0, + "kfe": 0.0, + "pfe": 0.0, + "pastern_to_foot": 0.0, + "base_joint": 0.0, + } + + # * initialization for random range setup + # these are the physical limits in the URDF as of 11 Dec 2025 + # dof_pos_range = { + # haa": [-0.2, 0.2], + # f_hfe": [-1.0, 0.6], + # h_hfe": [-1.5, 0.5], + # f_kfe": [-1.5, 0.1], + # h_kfe": [-0.2, 1.0], + # f_pfe": [-0.3, 3.0], + # h_pfe": [-1.2, 2.5], + # f_pastern_to_foot": [-0.3, 1.8], + # h_pastern_to_foot": [-0.3, 1.8], + # base_joint": [-0.2, 0.2], + # } + dof_pos_range = { + "haa": [-0.2, 0.2], + "hfe": [-1.0, 0.5], + "kfe": [-0.2, 0.1], + "pfe": [-0.3, 2.5], + "pastern_to_foot": [-0.3, 1.8], + "base_joint": [-0.0, 0.0], + } + dof_vel_range = { + "haa": [-0.2, 0.2], + "hfe": [-0.2, 0.2], + "kfe": [-0.2, 0.2], + "pfe": [-0.2, 0.2], + "pastern_to_foot": [-0.2, 0.2], + "base_joint": [-0.2, 0.2], + } + + root_pos_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [0.6, 1.35], # z + [-0.0, 0.0], # roll + [-0.0, 0.0], # pitch + [-0.2, 0.2], # yaw + ] + root_vel_range = [ + [0.0, 0.0], # x + [0.0, 0.0], # y + [0.0, 0.0], # z + [0.0, 0.0], # roll + [0.0, 0.0], # pitch + [0.0, 0.0], # yaw + ] + + class control(HorseCfg.control): + # * PD Drive parameters: + # HorseOscCfg.control + stiffness = { + "haa": 75, + "hfe": 40, + "kfe": 38, + "pfe": 10, + "pastern_to_foot": 5, + "base_joint": 50, # still needs modifying + } + damping = { + "haa": 0.0001, + "hfe": 0.0001, + "kfe": 0.0001, + "pfe": 0.5, + "pastern_to_foot": 0.5, + "base_joint": 10, + } + ctrl_frequency = 250 # how often the PDF controller/action updates run + desired_sim_frequency = 1000 # how often the physics is calculated + + class osc: # <-------------------most likely needs tuning + process_noise_std = 0.25 + grf_threshold = 0.1 # 20. # Normalized to body weight + # oscillator parameters + omega = 3 # gets overwritten + coupling = 1 # gets overwritten + osc_bool = False # not used in paper + grf_bool = False # not used in paper + randomize_osc_params = False + omega_range = [1.0, 4.0] # [0.0, 10.] + coupling_range = [0.0, 1.0] + offset_range = [0.0, 0.0] + stop_threshold = 0.5 + omega_stop = 1.0 + omega_step = 2.0 + omega_slope = 1.0 + omega_max = 4.0 + # coupling_step = 0. + # coupling_stop = 0. + coupling_stop = 4.0 + coupling_step = 1.0 + coupling_slope = 0.0 + coupling_max = 1.0 + offset = 1.0 + + init_to = "random" + + class commands: + resampling_time = 3.0 # * time before command are changed[s] + var = 1.0 + + class ranges: + lin_vel_x = [-1.0, 0.0, 1.0, 3.0, 6.0] # min max [m/s] + lin_vel_y = 1.0 # max [m/s] + yaw_vel = 6 # max [rad/s] + height = [0.0, 1.30] # m, command range of height during training + + class push_robots: + toggle = False + interval_s = 1 + max_push_vel_xy = 0.5 + push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m] + + class domain_rand: + randomize_friction = True + friction_range = [0.4, 1.0] + randomize_base_mass = False + lower_mass_offset = -0.5 # kg + upper_mass_offset = 2.0 + lower_z_offset = 0.0 # m + upper_z_offset = 0.2 + lower_x_offset = 0.0 + upper_x_offset = 0.0 + + class asset(HorseCfg.asset): + shank_length_diff = 0 # Units in cm + file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/" + "horse/urdf/horse.urdf" + foot_name = "foot" + penalize_contacts_on = ["thigh"] # "thigh", "shank", "pastern" + terminate_after_contacts_on = ["top"] + collapse_fixed_joints = False + fix_base_link = False + self_collisions = 1 + flip_visual_attachments = False + disable_gravity = False + disable_motors = False + joint_damping = 0.3 + + class reward_settings(HorseCfg.reward_settings): + soft_dof_pos_limit = 0.9 + soft_dof_vel_limit = 0.9 + soft_torque_limit = 0.9 + max_contact_force = 2000.0 # testing, this was too low for a horse weight + base_height_target = BASE_HEIGHT_REF + 0.03 + tracking_sigma = 0.25 + switch_scale = 0.5 + + class scaling(HorseCfg.scaling): + base_ang_vel = [0.3, 0.3, 0.1] + base_lin_vel = BASE_HEIGHT_REF + dof_vel = [2.0] + (4 * [0.5, 0.5, 0.5, 0.5, 0.5]) + base_height = BASE_HEIGHT_REF + dof_pos = [0.2] + ( + 4 * [0.4, 1.3, 1.4, 2.5, 2.1] + ) # reducing this to be alot smaller 2.1 or 2.5 + dof_pos_obs = dof_pos + # dof_pos_target = [2.0 * x for x in dof_pos] # target joint angles + dof_pos_target = [0.4] + (4 * [0.8, 2.6, 2.8, 5.0, 4.2]) + tau_ff = [1100] + (4 * [1000, 1000, 1000, 500, 300]) # not being used + commands = [3, 1, 3, 1] # add height as a command + + +class HorseOscRunnerCfg(HorseRunnerCfg): + seed = -1 + runner_class_name = "OnPolicyRunner" + + class actor(HorseRunnerCfg.actor): + hidden_dims = [256, 256, 128] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "dof_pos_target", + # "osc_omega", + # "osc_coupling" + # "oscillators_vel", + # "grf", + ] + normalize_obs = True + actions = ["dof_pos_target"] + add_noise = True + disable_actions = False + + class noise: + scale = 1.0 + dof_pos_obs = 0.01 + base_ang_vel = 0.01 + dof_pos = 0.005 + dof_vel = 0.005 + lin_vel = 0.05 + ang_vel = [0.3, 0.15, 0.4] + gravity_vec = 0.1 + + class critic(HorseRunnerCfg.critic): + hidden_dims = [128, 64] + # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid + activation = "elu" + obs = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "oscillator_obs", + "oscillators_vel", + "dof_pos_target", + ] + normalize_obs = True + + class reward: + class weights: + tracking_lin_vel = 4.0 + tracking_ang_vel = 2.0 + lin_vel_z = 0.0 + ang_vel_xy = 0.01 + orientation = 1.0 + torques = 5.0e-10 + dof_vel = 0.0 + # min_base_height = 1.0 + action_rate = 1e-5 + action_rate2 = 1e-6 + stand_still = 0.0 + dof_pos_limits = 0.0 + feet_contact_forces = 0.0 + dof_near_home = 0.0 + swing_grf = 5.0 + stance_grf = 5.0 + swing_velocity = 0.0 + stance_velocity = 0.0 + coupled_grf = 1.0 # penalize for grf during swing, no grf during stance + enc_pace = 0.0 + cursorial = 1.0 # enourage legs to stay under body, don't splay out + standing_torques = 0.0 # 1.e-5 + tracking_height = 5.0 + tendon_constraints = 1.0 + + class termination_weight: + termination = 15.0 / 100.0 + + class algorithm: + # both + gamma = 0.99 + lam = 0.95 + # shared + batch_size = 2**15 + max_gradient_steps = 24 + # new + storage_size = 2**17 # new + batch_size = 2**15 # new + + clip_param = 0.2 + learning_rate = 1.0e-3 + max_grad_norm = 1.0 + # Critic + use_clipped_value_loss = True + # Actor + entropy_coef = 0.01 + schedule = "adaptive" # could be adaptive, fixed + desired_kl = 0.01 + lr_range = [2e-4, 1e-2] + lr_ratio = 1.3 + + class runner(HorseRunnerCfg.runner): + run_name = "" + experiment_name = "horse_osc" + max_iterations = 1000 + algorithm_class_name = "PPO2" + num_steps_per_env = 32 diff --git a/gym/envs/mini_cheetah/mini_cheetah_config.py b/gym/envs/mini_cheetah/mini_cheetah_config.py index 3dc1507f..7608e080 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_config.py @@ -119,7 +119,7 @@ class scaling(LeggedRobotCfg.scaling): dof_pos_obs = dof_pos dof_pos_target = 4 * [0.2, 0.3, 0.3] tau_ff = 4 * [18, 18, 28] - commands = [3, 1, 3] + commands = [3, 1, 3, 1] class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg): diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py index 77866ca3..f2b64409 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_osc.py +++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py @@ -1,5 +1,4 @@ import torch -import numpy as np from isaacgym.torch_utils import torch_rand_float from gym.envs.mini_cheetah.mini_cheetah import MiniCheetah @@ -129,31 +128,6 @@ def compute_osc_slope(self): self.osc_omega = torch.clamp_min(self.osc_omega, 0.1) self.osc_coupling = torch.clamp_min(self.osc_coupling, 0) - def _process_rigid_body_props(self, props, env_id): - if env_id == 0: - # * init buffers for the domain rand changes - self.mass = torch.zeros(self.num_envs, 1, device=self.device) - self.com = torch.zeros(self.num_envs, 3, device=self.device) - - # * randomize mass - if self.cfg.domain_rand.randomize_base_mass: - lower = self.cfg.domain_rand.lower_mass_offset - upper = self.cfg.domain_rand.upper_mass_offset - # self.mass_ - props[0].mass += np.random.uniform(lower, upper) - self.mass[env_id] = props[0].mass - # * randomize com position - lower = self.cfg.domain_rand.lower_z_offset - upper = self.cfg.domain_rand.upper_z_offset - props[0].com.z += np.random.uniform(lower, upper) - self.com[env_id, 2] = props[0].com.z - - lower = self.cfg.domain_rand.lower_x_offset - upper = self.cfg.domain_rand.upper_x_offset - props[0].com.x += np.random.uniform(lower, upper) - self.com[env_id, 0] = props[0].com.x - return props - def _post_decimation_step(self): """Update all states that are not handled in PhysX""" super()._post_decimation_step() @@ -174,19 +148,13 @@ def _step_oscillators(self, dt=None): ) grf = self._compute_grf() self.oscillators_vel = self.osc_omega - grf * local_feedback - # self.oscillators_vel *= torch_rand_float(0.9, - # 1.1, - # self.oscillators_vel.shape, - # self.device) self.oscillators_vel += ( torch.randn(self.oscillators_vel.shape, device=self.device) * self.cfg.osc.process_noise_std ) self.oscillators_vel *= 2 * torch.pi - self.oscillators += ( - self.oscillators_vel * dt - ) # torch.clamp(self.oscillators_vel * dt, min=0) + self.oscillators += self.oscillators_vel * dt self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi) self.oscillator_obs = torch.cat( (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1 @@ -214,17 +182,6 @@ def _resample_commands(self, env_ids): torch.randn((len(env_ids), 1), device=self.device) * self.cfg.commands.var ) - # possible_commands = torch.tensor(self.command_ranges["lin_vel_y"], - # device=self.device) - # self.commands[env_ids, 1:2] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - # possible_commands = torch.tensor(self.command_ranges["yaw_vel"], - # device=self.device) - # self.commands[env_ids, 0:1] = possible_commands[torch.randint( - # 0, len(possible_commands), (len(env_ids), 1), - # device=self.device)] - if 0 in self.cfg.commands.ranges.lin_vel_x: # * with 20% chance, reset to 0 commands except for forward self.commands[env_ids, 1:] *= ( diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py index 2c32f06b..e8796147 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py +++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py @@ -163,7 +163,7 @@ class scaling(MiniCheetahCfg.scaling): dof_pos_target = 4 * [0.2, 0.3, 0.3] tau_ff = 4 * [18, 18, 28] # hip-abad, hip-pitch, knee # commands = [base_lin_vel, base_lin_vel, base_ang_vel] - commands = [3, 1, 3] # [base_lin_vel, base_lin_vel, base_ang_vel] + commands = [3, 1, 3, 1] # [base_lin_vel, base_lin_vel, base_ang_vel] class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg): diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref.py b/gym/envs/mini_cheetah/mini_cheetah_ref.py index 57e1f0e7..a1a3ca13 100644 --- a/gym/envs/mini_cheetah/mini_cheetah_ref.py +++ b/gym/envs/mini_cheetah/mini_cheetah_ref.py @@ -59,8 +59,7 @@ def _switch(self): def _reward_swing_grf(self): """Reward non-zero grf during swing (0 to pi)""" in_contact = torch.gt( - torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), - 50.0, + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0 ) ph_off = torch.lt(self.phase, torch.pi) rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1) @@ -69,8 +68,7 @@ def _reward_swing_grf(self): def _reward_stance_grf(self): """Reward non-zero grf during stance (pi to 2pi)""" in_contact = torch.gt( - torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), - 50.0, + torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1), 50.0 ) ph_off = torch.gt(self.phase, torch.pi) # should this be in swing? rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1) diff --git a/gym/utils/interfaces/KeyboardInterface.py b/gym/utils/interfaces/KeyboardInterface.py index d2e3ce99..ce740f50 100644 --- a/gym/utils/interfaces/KeyboardInterface.py +++ b/gym/utils/interfaces/KeyboardInterface.py @@ -15,6 +15,10 @@ def __init__(self, env): env.gym.subscribe_viewer_keyboard_event( env.viewer, gymapi.KEY_SPACE, "space_shoot" ) + env.gym.subscribe_viewer_keyboard_event(env.viewer, gymapi.KEY_UP, "height_up") + env.gym.subscribe_viewer_keyboard_event( + env.viewer, gymapi.KEY_DOWN, "height_down" + ) env.gym.subscribe_viewer_mouse_event( env.viewer, gymapi.MOUSE_LEFT_BUTTON, "mouse_shoot" ) @@ -24,6 +28,7 @@ def __init__(self, env): print("WASD: forward, strafe left, backward, strafe right") print("QE: yaw left/right") print("R: reset environments") + print("Arrow Up/Down: stand up/lay down") print("ESC: quit") print("______________________________________________________________") @@ -39,6 +44,10 @@ def __init__(self, env): self.max_vel_yaw = 2.0 self.increment_yaw = self.max_vel_yaw * 0.2 + self.min_height = -1000 + self.max_height = 1.3 + self.increment_height = 0.1 + def update(self, env): for evt in env.gym.query_viewer_action_events(env.viewer): if evt.value == 0: @@ -73,6 +82,16 @@ def update(self, env): env.commands[:, 2] + self.increment_yaw, max=self.max_vel_yaw, ) + elif evt.action == "height_up": + env.commands[:, 3] = torch.clamp( + env.commands[:, 3] + self.increment_height, + max=self.max_height, + ) + elif evt.action == "height_down": + env.commands[:, 3] = torch.clamp( + env.commands[:, 3] - self.increment_height, + min=self.min_height, + ) elif evt.action == "QUIT": env.exit = True elif evt.action == "RESET": diff --git a/resources/robots/horse/urdf/horse.urdf b/resources/robots/horse/urdf/horse.urdf new file mode 100644 index 00000000..22d3eb3f --- /dev/null +++ b/resources/robots/horse/urdf/horse.urdf @@ -0,0 +1,605 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/scripts/analyze_joint_scaling.py b/scripts/analyze_joint_scaling.py new file mode 100644 index 00000000..1de90d03 --- /dev/null +++ b/scripts/analyze_joint_scaling.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import numpy as np +import os +import matplotlib.pyplot as plt +import csv + +OUT_DIR = "scaling_analysis" +os.makedirs(OUT_DIR, exist_ok=True) + +LOG_FILE = "joint_logs.npz" + +# load data +d = np.load(LOG_FILE, allow_pickle=True) + +actual = d["actual_pos"] # [envs, steps, joints] +target = d["target_pos"] +joint_names = d["joint_name"] + +num_envs, total_steps, num_joints = actual.shape + +# episode length +if "step" in d: + step_arr = d["step"] + valid = np.where(step_arr != 0)[0] + actual_steps = valid[-1] + 1 if len(valid) > 0 else 1 +else: + tmp = target[0] + nz = np.where(np.any(tmp != 0, axis=1))[0] + actual_steps = nz[-1] + 1 + +print(f"Trimming timestep dimension from {total_steps} -> {actual_steps}") + +# trim arrays +actual = actual[:, :actual_steps, :] +target = target[:, :actual_steps, :] + +# raw error +error = target - actual # [envs, steps, joints] +abs_error = np.abs(error) + +# shape: (num_envs * steps, num_joints) +flat_err = abs_error.reshape(-1, num_joints) + +# mean absolute error for each joint +mae = np.mean(flat_err, axis=0) + +# standard deviation of absolute error for each joint +std_err = np.std(flat_err, axis=0) + +# csv stats +csv_path = os.path.join(OUT_DIR, "error_summary.csv") +with open(csv_path, "w", newline="") as f: + w = csv.writer(f) + w.writerow(["joint_idx", "joint_name", "MAE_all_envs", "STD_all_envs"]) + for j in range(num_joints): + w.writerow( + [ + j, + joint_names[j], + float(mae[j]), + float(std_err[j]), + ] + ) + +print(f"Saved raw error summary (all envs): {csv_path}") + +# histogram with all env +all_errors = error.reshape(-1, num_joints) + +for j in range(num_joints): + name = joint_names[j] + plt.figure(figsize=(10, 4)) + plt.hist(all_errors[:, j], bins=80) + plt.title(f"{name} – Raw Error Histogram (All Envs Combined)") + plt.xlabel("target_pos - actual_pos") + plt.ylabel("count") + plt.grid(True, alpha=0.3) + + out = os.path.join(OUT_DIR, f"{j:02d}_{name}_hist.png") + plt.savefig(out, dpi=200) + plt.close() + +print(f"Saved histograms in: {OUT_DIR}") diff --git a/scripts/play.py b/scripts/play.py index 833e6641..46d3bb46 100644 --- a/scripts/play.py +++ b/scripts/play.py @@ -5,6 +5,10 @@ # torch needs to be imported after isaacgym imports in local source import torch +import numpy as np +import os + +BASE_HEIGHT_REF = 1.3 def setup(args): @@ -29,7 +33,120 @@ def setup(args): return env, runner, train_cfg +def create_obs_logging_dict(env, obs_vars, num_steps): + """ + Create a dictionary to log raw and scaled observation data. + + Returns: + obs_log: dict with keys: + {var}_raw -> [num_envs, num_steps, ...] + {var}_scaled -> [num_envs, num_steps, ...] + """ + num_envs = env.num_envs + obs_log = {} + + for var in obs_vars: + val = getattr(env, var) + shape = (num_envs, num_steps) + val.shape[1:] # preserve timestep array shape + + # allocate tensors + obs_log[f"{var}_raw"] = torch.zeros(shape, device=val.device) + obs_log[f"{var}_scaled"] = torch.zeros_like(obs_log[f"{var}_raw"]) + + return obs_log + + +def log_obs_step(env, obs_log, obs_vars, step_idx): + for var in obs_vars: + obs_log[f"{var}_raw"][:, step_idx, ...] = getattr(env, var) + obs_log[f"{var}_scaled"][:, step_idx, ...] = env.get_state(var) + + +def create_logging_dict(env, num_steps): + # creates a dictionary of tensors to store joint data for each timestep. + num_envs = env.num_envs + num_dofs = env.num_dof + + log_dict = { + "step": torch.zeros((num_steps,), dtype=torch.int32, device=env.device), + "target_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + "actual_pos": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + "torque": torch.zeros((num_envs, num_steps, num_dofs), device=env.device), + } + + # Get joint names + joint_names = ( + env.dof_names + if hasattr(env, "dof_names") + else [f"joint_{i}" for i in range(num_dofs)] + ) + log_dict["joint_name"] = joint_names + + return log_dict + + def play(env, runner, train_cfg): + num_steps = int(env.max_episode_length) + log_data = create_logging_dict(env, num_steps) + + obs_vars = [ + "base_height", + "base_lin_vel", + "base_ang_vel", + "projected_gravity", + "commands", + "dof_pos_obs", + "dof_vel", + "dof_pos_target", + ] + + obs_log = create_obs_logging_dict(env, obs_vars, num_steps) + + # --- height reward logging --- + height_rew = np.zeros((num_steps,), dtype=np.float32) + height_actual = np.zeros((num_steps,), dtype=np.float32) + height_target = np.zeros((num_steps,), dtype=np.float32) + + # ---------- record init pos/height ---------- + OUT_DIR = "play_logs" + os.makedirs(OUT_DIR, exist_ok=True) + + # num dofs (don't rely on env.num_dofs, use tensor shape) + num_dofs = env.dof_pos.shape[1] + + # Capture *one-time* init snapshot for all envs + # root z: prefer root_states[:,2] if it exists; otherwise fall back to base_height + if hasattr(env, "root_states"): + init_root_z = env.root_states[:, 2].detach().cpu().numpy().astype(np.float32) + else: + init_root_z = env.base_height.detach().cpu().numpy().astype(np.float32) + + # commanded height at start + init_cmd_h = env.commands[:, 3].detach().cpu().numpy().astype(np.float32) + # starting joint positions + init_dof_pos = env.dof_pos.detach().cpu().numpy().astype(np.float32) + + # Build a single 2D table: (num_envs, 2 + num_dofs) + # col0 = init_root_z, col1 = init_cmd_h, col2.. = dof_pos + init_table = np.zeros((env.num_envs, 2 + num_dofs), dtype=np.float32) + init_table[:, 0] = init_root_z + init_table[:, 1] = init_cmd_h + init_table[:, 2:] = init_dof_pos + + # Column names/joint names + if hasattr(env, "dof_names"): + dof_names = list(env.dof_names) + else: + # fallback: generic names + dof_names = [f"dof_{i}" for i in range(num_dofs)] + + col_names = ["init_root_z", "init_cmd_height"] + [f"init_{n}" for n in dof_names] + + # track actual number of simulation steps + actual_steps = 0 + # track and print commanded height changes + last_height_cmd = None + # * set up recording if env.cfg.viewer.record: recorder = VisualizationRecorder( @@ -41,18 +158,104 @@ def play(env, runner, train_cfg): if COMMANDS_INTERFACE: # interface = GamepadInterface(env) interface = KeyboardInterface(env) - for i in range(10 * int(env.max_episode_length)): - if COMMANDS_INTERFACE: - interface.update(env) - if env.cfg.viewer.record: - recorder.update(i) - runner.set_actions( - runner.actor_cfg["actions"], - runner.get_inference_actions(), - runner.actor_cfg["disable_actions"], + + # resample height command only + h_min, h_max = env.cfg.commands.ranges.height + env.commands[:, 3] = h_min + (h_max - h_min) * torch.rand( + (env.num_envs,), device=env.device ) - env.step() - env.check_exit() + + try: + for i in range(10 * int(env.max_episode_length)): + if COMMANDS_INTERFACE: + interface.update(env) + if env.cfg.viewer.record: + recorder.update(i) + + # print target/actual height (m) + target_height = env.commands[0, 3].item() + if (last_height_cmd is None) or ( + abs(target_height - last_height_cmd) > 1e-6 + ): + actual_height = env.base_height[0].item() + + print( + f"[HEIGHT] actual = {actual_height:.3f} m | " + f"target = {target_height:.3f} m | " + f"error = {actual_height - target_height:.3f} | " + f"descend mode = {(env.commands[:, 3][0]) < 1.0}" + ) + last_height_cmd = target_height + + runner.set_actions( + runner.actor_cfg["actions"], + runner.get_inference_actions(), + runner.actor_cfg["disable_actions"], + ) + env.step() + + # log only actual simulation steps + if actual_steps < num_steps: + # log joints + log_data["step"][actual_steps] = actual_steps + log_data["target_pos"][:, actual_steps, :] = env.dof_pos_target + log_data["actual_pos"][:, actual_steps, :] = env.dof_pos + log_data["torque"][:, actual_steps, :] = env.torques + + # log observations + log_obs_step(env, obs_log, obs_vars, actual_steps) + + # _reward_tracking_height returns (num_envs,) tensor + r = env._reward_tracking_height() # torch tensor + height_rew[actual_steps] = r[0].item() + height_actual[actual_steps] = env.base_height[0].item() + height_target[actual_steps] = env.commands[0, 3].item() + + actual_steps += 1 + + env.check_exit() # user exit or viewer closed + + except KeyboardInterrupt: + print("\n[INFO] Interrupted by user, saving logs...") + except SystemExit: + print("\n[INFO] Viewer closed, saving logs...") + finally: + # slice to actual steps before saving + log_data_cpu = { + k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v) + for k, v in log_data.items() + } + for key in ["step", "target_pos", "actual_pos", "torque"]: + log_data_cpu[key] = log_data_cpu[key][:actual_steps] + + np.savez_compressed("joint_logs.npz", **log_data_cpu) + print(f"\nSaved joint log to joint_logs.npz ({actual_steps} steps)") + + obs_log_cpu = { + k: (v.detach().cpu().numpy() if torch.is_tensor(v) else v) + for k, v in obs_log.items() + } + for key in obs_log_cpu.keys(): + obs_log_cpu[key] = obs_log_cpu[key][:, :actual_steps, ...] + + np.savez_compressed("obs_logs.npz", **obs_log_cpu) + print(f"\nSaved obs log to obs_logs.npz ({actual_steps} steps)") + + # ---------- save init data ---------- + init_npz_path = os.path.join(OUT_DIR, "init_snapshot.npz") + np.savez_compressed( + init_npz_path, + init_table=init_table, + col_names=np.array(col_names, dtype=object), + ) + print(f"[saved] init snapshot -> {init_npz_path}") + + # save as csv + init_csv_path = os.path.join(OUT_DIR, "init_snapshot.csv") + with open(init_csv_path, "w") as f: + f.write(",".join(col_names) + "\n") + np.savetxt(f, init_table, delimiter=",", fmt="%.6f") + print(f"[saved] init snapshot csv -> {init_csv_path}") if __name__ == "__main__": diff --git a/scripts/plot_height_rewards.py b/scripts/plot_height_rewards.py new file mode 100644 index 00000000..967bf178 --- /dev/null +++ b/scripts/plot_height_rewards.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import numpy as np +import matplotlib.pyplot as plt + +LOG_FILE = "height_reward_logs.npz" +OUT_PNG = "height_reward_plot.png" + +d = np.load(LOG_FILE) +step = d["step"] +height_rew = d["height_rew"] +h_actual = d["height_actual"] +h_target = d["height_target"] + +plt.figure(figsize=(12, 5)) + +plt.plot(step, h_actual, label="actual height (m)") +plt.plot(step, h_target, label="target height (m)") +plt.plot(step, height_rew, label="_reward_tracking_height") + +plt.xlabel("step") +plt.ylabel("value") +plt.title("Height Tracking + Reward (Same Axis)") +plt.grid(True, alpha=0.3) +plt.legend() +plt.tight_layout() +plt.savefig(OUT_PNG, dpi=200) + +print(f"Saved plot -> {OUT_PNG}") diff --git a/scripts/plot_logs_by_joint.py b/scripts/plot_logs_by_joint.py new file mode 100644 index 00000000..a2952bf2 --- /dev/null +++ b/scripts/plot_logs_by_joint.py @@ -0,0 +1,244 @@ +import numpy as np +import matplotlib.pyplot as plt +import os +import re + +# config +LOG_FILE = "joint_logs.npz" +ENV_ID = 0 +SAVE_DIR = "plots_by_joint" + +# output folder +os.makedirs(SAVE_DIR, exist_ok=True) + +# joint limits +JOINT_LIMITS = { + r".*haa": [-0.2, 0.2], + r".*f_hfe": [-1.0, 0.6], + r".*h_hfe": [-1.5, 0.5], + r".*f_kfe": [-1.5, 0.1], + r".*h_kfe": [-0.2, 1.0], + r".*f_pfe": [-0.3, 3.0], + r".*h_pfe": [-1.2, 2.5], + r".*f_pastern_to_foot": [-0.3, 1.8], + r".*h_pastern_to_foot": [-0.3, 1.8], + r".*base_joint": [-0.2, 0.2], +} + + +def find_joint_limits(joint_name: str): + for pattern, limits in JOINT_LIMITS.items(): + if re.match(pattern, joint_name): + return limits + return None + + +data = np.load(LOG_FILE, allow_pickle=True) +steps = data["step"] +target_pos = data["target_pos"] +actual_pos = data["actual_pos"] +torques = data["torque"] +joint_names = data["joint_name"] + +num_envs, num_steps, num_joints = actual_pos.shape +print(f"Loaded log for {num_joints} joints, {num_steps} steps.") + +pattern = re.compile(r".*(rh|lh|rf|lf)_(haa|hfe|kfe|pfe|pastern_to_foot)$") + +joint_map = {} +base_joints = {} + +for idx, name in enumerate(joint_names): + match = pattern.match(name) + if match: + leg, joint_type = match.groups() + leg, joint_type = leg.lower(), joint_type.lower() + joint_map.setdefault(joint_type, {})[leg] = idx + else: + # any joint name not matching the joint pattern will get its own plot + base_joints[name] = idx + + +def plot_joint_type(joint_type, leg_to_idx): + # 4 rows (LF/RF pos+torque, LH/RH pos+torque) × 2 columns + fig, axs = plt.subplots(4, 2, figsize=(12, 12), constrained_layout=True) + axs = np.array(axs) + + leg_pos = { + "lf": (0, 0), + "rf": (0, 1), + "lh": (2, 0), + "rh": (2, 1), + } + + for leg, (pos_row, col) in leg_pos.items(): + torque_row = pos_row + 1 + if leg not in leg_to_idx: + axs[pos_row, col].axis("off") + axs[torque_row, col].axis("off") + continue + + j = leg_to_idx[leg] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps) + + # position plot + pos_ax = axs[pos_row, col] + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + linestyle="--", + linewidth=1.3, + label="Target Pos", + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + linewidth=1.3, + label="Actual Pos", + ) + + # add joint limit lines + limits = find_joint_limits(joint_names[j]) + if limits is not None: + lo, hi = limits + pos_ax.axhline( + lo, + color="blue", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Lower Limit ({lo:.2f})", + ) + pos_ax.axhline( + hi, + color="red", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Upper Limit ({hi:.2f})", + ) + + ymin, ymax = pos_ax.get_ylim() + ymin = min(ymin, lo - 0.05) + ymax = max(ymax, hi + 0.05) + pos_ax.set_ylim([ymin, ymax]) + + pos_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Position") + pos_ax.set_ylabel("Position (rad)") + pos_ax.grid(True, linestyle="--", alpha=0.4) + pos_ax.legend(fontsize=7) + + # torque plot + torque_ax = axs[torque_row, col] + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_title(f"{leg.upper()} - {joint_type.upper()} Torque") + torque_ax.set_ylabel("Torque (Nm)") + torque_ax.set_xlabel("Step") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + plt.suptitle(f"{joint_type.upper()} Joints (All Legs)", fontsize=16) + save_path = os.path.join(SAVE_DIR, f"{joint_type}.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {save_path}") + + +# plot function for base +def plot_base_joints(base_joints): + fig, axs = plt.subplots( + len(base_joints) * 2, + 1, + figsize=(10, 4 * len(base_joints)), + constrained_layout=True, + ) + + if len(base_joints) == 1: + axs = np.array([axs[0], axs[1]]) + + for i, (name, j) in enumerate(base_joints.items()): + pos_ax = axs[2 * i] + torque_ax = axs[2 * i + 1] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + last_valid = nonzero_idx[-1] + 1 if len(nonzero_idx) > 0 else len(steps) + + # position + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + linestyle="--", + linewidth=1.3, + label="Target Pos", + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + linewidth=1.3, + label="Actual Pos", + ) + + # add joint limits + limits = find_joint_limits(name) + if limits is not None: + lo, hi = limits + pos_ax.axhline( + lo, + color="blue", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Lower Limit ({lo:.2f})", + ) + pos_ax.axhline( + hi, + color="red", + linestyle="--", + linewidth=1, + alpha=0.7, + label=f"Upper Limit ({hi:.2f})", + ) + + ymin, ymax = pos_ax.get_ylim() + ymin = min(ymin, lo - 0.05) + ymax = max(ymax, hi + 0.05) + pos_ax.set_ylim([ymin, ymax]) + + pos_ax.set_title(f"{name} Position") + pos_ax.set_ylabel("Position (rad)") + pos_ax.legend(fontsize=7) + pos_ax.grid(True, linestyle="--", alpha=0.4) + + # torque + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_title(f"{name} Torque") + torque_ax.set_ylabel("Torque (Nm)") + torque_ax.set_xlabel("Step") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + plt.suptitle("Base Joints", fontsize=16) + save_path = os.path.join(SAVE_DIR, "base_joints.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {save_path}") + + +# generate plots +for joint_type, leg_to_idx in joint_map.items(): + plot_joint_type(joint_type, leg_to_idx) + +if base_joints: + plot_base_joints(base_joints) diff --git a/scripts/plot_logs_by_leg.py b/scripts/plot_logs_by_leg.py new file mode 100644 index 00000000..a12351cc --- /dev/null +++ b/scripts/plot_logs_by_leg.py @@ -0,0 +1,97 @@ +import numpy as np +import matplotlib.pyplot as plt +import os + +# config +LOG_FILE = "joint_logs.npz" +ENV_ID = 0 +SAVE_DIR = "plots_by_leg" + +# output folder +os.makedirs(SAVE_DIR, exist_ok=True) + +data = np.load(LOG_FILE, allow_pickle=True) +steps = data["step"] +target_pos = data["target_pos"] +actual_pos = data["actual_pos"] +torques = data["torque"] +joint_names = data["joint_name"] + +num_envs, num_steps, num_joints = actual_pos.shape +print(f"Loaded log for {num_joints} joints, {num_steps} steps.") + +# define grouping +groups = { + "base": [0], + "right_hind": list(range(1, 6)), + "left_hind": list(range(6, 11)), + "right_front": list(range(11, 16)), + "left_front": list(range(16, 21)), +} + + +def plot_joint_group(group_name, joint_indices): + num_joints_group = len(joint_indices) + fig, axs = plt.subplots( + nrows=num_joints_group * 2, + ncols=1, + figsize=(10, 3 * num_joints_group * 2), + constrained_layout=True, + ) + + if num_joints_group == 1: + axs = np.array([axs[0], axs[1]]) # ensure consistent 2D array shape + + for idx, j in enumerate(joint_indices): + pos_ax = axs[2 * idx] + torque_ax = axs[2 * idx + 1] + + # find last index with nonzero torque (or position) + nonzero_idx = np.where(np.abs(actual_pos[ENV_ID, :, j]) > 1e-6)[0] + if len(nonzero_idx) > 0: + last_valid = nonzero_idx[-1] + 1 + else: + last_valid = len(steps) + + # position + pos_ax.plot( + steps[:last_valid], + target_pos[ENV_ID, :last_valid, j], + label="Target Pos", + linestyle="--", + linewidth=1.5, + ) + pos_ax.plot( + steps[:last_valid], + actual_pos[ENV_ID, :last_valid, j], + label="Actual Pos", + linewidth=1.5, + ) + pos_ax.set_ylabel(f"{joint_names[j]} Pos (rad)") + pos_ax.legend(loc="upper right", fontsize=8) + pos_ax.grid(True, linestyle="--", alpha=0.4) + + # torque + torque_ax.plot( + steps[:last_valid], + torques[ENV_ID, :last_valid, j], + color="tab:red", + linewidth=1.2, + ) + torque_ax.set_ylabel(f"{joint_names[j]} Torque (Nm)") + torque_ax.grid(True, linestyle="--", alpha=0.4) + + axs[-1].set_xlabel("Step") + plt.suptitle(f"{group_name.capitalize()} Joint Logs", fontsize=14) + save_path = os.path.join(SAVE_DIR, f"{group_name}.png") + plt.savefig(save_path, dpi=200) + plt.close(fig) + print(f"Saved {group_name}.png") + + +# generate plots +for group_name, joint_indices in groups.items(): + if max(joint_indices) < num_joints: + plot_joint_group(group_name, joint_indices) + else: + print(f"Skipping {group_name}: exceeds joint index range.") diff --git a/scripts/sweep_configs/sweep_horse_osc_config.json b/scripts/sweep_configs/sweep_horse_osc_config.json new file mode 100644 index 00000000..df27bc5a --- /dev/null +++ b/scripts/sweep_configs/sweep_horse_osc_config.json @@ -0,0 +1,58 @@ +{ + "method": "random", + "metric": { + "name": "rewards/total_rewards", + "goal": "maximize" + }, + "run_cap": 50, + "parameters": { + "train_cfg.runner.max_iterations": { + "values": [500] + }, + "env_cfg.scaling.dof_pos": { + "values": [ + [0.2, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1, 0.4, 1.3, 1.4, 2.5, 2.1], + [0.24, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52, 0.48, 1.56, 1.68, 3.0, 2.52], + [0.288, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024, 0.576, 1.872, 2.016, 3.6, 3.024], + [0.3456, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288, 0.6912, 2.2464, 2.4192, 4.32, 3.6288], + [0.41472, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536, 0.82944, 2.69568, 2.90304, 5.184, 4.3536], + [0.497664, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472, 0.995328, 3.234816, 3.483648, 6.2208, 5.225472] + ] + }, + + "env_cfg.scaling.dof_pos_target": { + "values": [ + [0.4, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2, 0.8, 2.6, 2.8, 5.0, 4.2], + [0.48, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04, 0.96, 3.12, 3.36, 6.0, 5.04], + [0.576, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048, 1.152, 3.744, 4.032, 7.2, 6.048], + [0.6912, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576, 1.3824, 4.4928, 4.8384, 8.64, 7.2576], + [0.82944, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008, 1.65888, 5.39136, 5.80608, 10.368, 8.71008], + [0.995328, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976, 1.990656, 6.469632, 6.966912, 12.4416, 10.478976] + ] + }, + + "env_cfg.control.stiffness": { + "values": [ + {"haa": 4000, "hfe": 4000, "kfe": 4000, "pfe": 4000, "pastern_to_foot": 4000, "base_joint": 4000}, + {"haa": 3000, "hfe": 3000, "kfe": 3000, "pfe": 3000, "pastern_to_foot": 3000, "base_joint": 3000}, + {"haa": 2000, "hfe": 2000, "kfe": 2000, "pfe": 2000, "pastern_to_foot": 2000, "base_joint": 2000}, + {"haa": 1000, "hfe": 1000, "kfe": 1000, "pfe": 1000, "pastern_to_foot": 1000, "base_joint": 1000}, + {"haa": 500, "hfe": 500, "kfe": 500, "pfe": 500, "pastern_to_foot": 500, "base_joint": 500}, + {"haa": 250, "hfe": 250, "kfe": 250, "pfe": 250, "pastern_to_foot": 250, "base_joint": 250}, + {"haa": 100, "hfe": 100, "kfe": 100, "pfe": 100, "pastern_to_foot": 100, "base_joint": 100}, + {"haa": 50, "hfe": 50, "kfe": 50, "pfe": 50, "pastern_to_foot": 50, "base_joint": 50} + ] + }, + + "env_cfg.control.damping": { + "values": [ + {"haa": 400, "hfe": 400, "kfe": 400, "pfe": 400, "pastern_to_foot": 400, "base_joint": 400}, + {"haa": 300, "hfe": 300, "kfe": 300, "pfe": 300, "pastern_to_foot": 300, "base_joint": 300}, + {"haa": 250, "hfe": 250, "kfe": 250, "pfe": 250, "pastern_to_foot": 250, "base_joint": 250}, + {"haa": 200, "hfe": 200, "kfe": 200, "pfe": 200, "pastern_to_foot": 200, "base_joint": 200}, + {"haa": 100, "hfe": 100, "kfe": 100, "pfe": 100, "pastern_to_foot": 100, "base_joint": 100}, + {"haa": 50, "hfe": 50, "kfe": 50, "pfe": 50, "pastern_to_foot": 50, "base_joint": 50} + ] + } + } +}