From 6b9a2c9324fa9daf56631149e326e5025ff073bc Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sat, 16 Mar 2024 12:04:58 -0400
Subject: [PATCH 01/26] add coupling to humanoid torques

---
 gym/envs/mit_humanoid/mit_humanoid.py        | 30 +++++++++-----------
 gym/envs/mit_humanoid/mit_humanoid_config.py |  2 +-
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 74e35d5a..10169afc 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -12,23 +12,19 @@ def _init_buffers(self):
         super()._init_buffers()
 
     def _compute_torques(self):
-        self.desired_pos_target = self.dof_pos_target + self.default_dof_pos
-        q = self.dof_pos.clone()
-        qd = self.dof_vel.clone()
-        q_des = self.desired_pos_target.clone()
-        qd_des = self.dof_vel_target.clone()
-        tau_ff = self.tau_ff.clone()
-        kp = self.p_gains.clone()
-        kd = self.d_gains.clone()
-
-        if self.cfg.asset.apply_humanoid_jacobian:
-            torques = _apply_coupling(q, qd, q_des, qd_des, kp, kd, tau_ff)
-        else:
-            torques = kp * (q_des - q) + kd * (qd_des - qd) + tau_ff
-
-        torques = torch.clip(torques, -self.torque_limits, self.torque_limits)
-
-        return torques.view(self.torques.shape)
+        return torch.clip(
+            _apply_coupling(
+                self.dof_pos,
+                self.dof_vel,
+                self.dof_pos_target + self.default_dof_pos,
+                self.dof_vel_target,
+                self.p_gains,
+                self.d_gains,
+                self.tau_ff,
+            ),
+            -self.torque_limits,
+            self.torque_limits,
+        )  # view shape
 
     def _reward_lin_vel_z(self):
         # Penalize z axis base linear velocity w. squared exp
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 3439e58c..cdc4334f 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -248,7 +248,7 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
 
     class runner(LeggedRobotRunnerCfg.runner):
         policy_class_name = "ActorCritic"
-        algorithm_class_name = "PPO"
+        algorithm_class_name = "PPO2"
         num_steps_per_env = 24
         max_iterations = 1000
         run_name = "Standing"

From e52208f5527c99849938726841ebba87a4dd01f6 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sat, 16 Mar 2024 12:14:47 -0400
Subject: [PATCH 02/26] add exp avg filtering

---
 gym/envs/mit_humanoid/mit_humanoid.py        | 25 ++++++++++----------
 gym/envs/mit_humanoid/mit_humanoid_config.py |  2 ++
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 10169afc..67b21868 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -2,6 +2,7 @@
 
 from gym.envs.base.legged_robot import LeggedRobot
 from .jacobian import _apply_coupling
+from gym.utils import exp_avg_filter
 
 
 class MIT_Humanoid(LeggedRobot):
@@ -12,19 +13,17 @@ def _init_buffers(self):
         super()._init_buffers()
 
     def _compute_torques(self):
-        return torch.clip(
-            _apply_coupling(
-                self.dof_pos,
-                self.dof_vel,
-                self.dof_pos_target + self.default_dof_pos,
-                self.dof_vel_target,
-                self.p_gains,
-                self.d_gains,
-                self.tau_ff,
-            ),
-            -self.torque_limits,
-            self.torque_limits,
-        )  # view shape
+        torques = _apply_coupling(
+            self.dof_pos,
+            self.dof_vel,
+            self.dof_pos_target + self.default_dof_pos,
+            self.dof_vel_target,
+            self.p_gains,
+            self.d_gains,
+            self.tau_ff,
+        )
+        torques = torques.clip(-self.torque_limits, self.torque_limits)
+        return exp_avg_filter(torques, self.torques, self.cfg.control.filter_gain)
 
     def _reward_lin_vel_z(self):
         # Penalize z axis base linear velocity w. squared exp
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index cdc4334f..d442216d 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -111,6 +111,8 @@ class control(LeggedRobotCfg.control):
         ctrl_frequency = 100
         desired_sim_frequency = 800
 
+        filter_gain = 0.1586  # 1: no filtering, 0: wall
+
     class commands:
         resampling_time = 10.0  # time before command are changed[s]
 

From 54b826db9475061b01139dc58b6d0e0d930dedb4 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sat, 16 Mar 2024 19:00:50 -0400
Subject: [PATCH 03/26] add oscillators and phase-based rewards, some tuning

---
 gym/envs/base/legged_robot.py                 |  45 +-
 gym/envs/base/legged_robot_config.py          |   2 +-
 .../mit_humanoid/humanoid_running_config.py   |   2 +-
 gym/envs/mit_humanoid/mit_humanoid.py         |  91 ++
 gym/envs/mit_humanoid/mit_humanoid_config.py  | 157 ++-
 .../robots/mit_humanoid/friction_model_L.pt   | Bin 0 -> 13519 bytes
 .../robots/mit_humanoid/friction_model_R.pt   | Bin 0 -> 13519 bytes
 .../mit_humanoid/urdf/humanoid_F_sf.urdf      |  16 +-
 .../urdf/humanoid_F_sf_learnt.urdf            | 996 ++++++++++++++++++
 9 files changed, 1242 insertions(+), 67 deletions(-)
 create mode 100644 resources/robots/mit_humanoid/friction_model_L.pt
 create mode 100644 resources/robots/mit_humanoid/friction_model_R.pt
 create mode 100644 resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf

diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
index e02507d5..1bc4623e 100644
--- a/gym/envs/base/legged_robot.py
+++ b/gym/envs/base/legged_robot.py
@@ -532,6 +532,29 @@ def _init_buffers(self):
             self.num_envs, 1, dtype=torch.float, device=self.device
         )
 
+        # # * get the body_name to body_index dict
+        # body_dict = self.gym.get_actor_rigid_body_dict(
+        #     self.envs[0], self.actor_handles[0]
+        # )
+        # # * extract a list of body_names where the index is the id number
+        # body_names = [
+        #     body_tuple[0]
+        #     for body_tuple in sorted(
+        #         body_dict.items(), key=lambda body_tuple: body_tuple[1]
+        #     )
+        # ]
+        # # * construct a list of id numbers corresponding to end_effectors
+        # self.end_effector_ids = []
+        # for end_effector_name in self.cfg.asset.foot_collisionbox_names:
+        #     self.end_effector_ids.extend(
+        #         [
+        #             body_names.index(body_name)
+        #             for body_name in body_names
+        #             if end_effector_name in body_name
+        #         ]
+        #     )
+        # # ----------------------------------------
+
         if self.cfg.terrain.measure_heights:
             self.height_points = self._init_height_points()
         self.measured_heights = 0
@@ -994,11 +1017,11 @@ def _reward_lin_vel_z(self):
 
     def _reward_ang_vel_xy(self):
         """Penalize xy axes base angular velocity"""
-        return -torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1)
+        return -torch.mean(torch.square(self.base_ang_vel[:, :2]), dim=1)
 
     def _reward_orientation(self):
         """Penalize non flat base orientation"""
-        return -torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1)
+        return -torch.mean(torch.square(self.projected_gravity[:, :2]), dim=1)
 
     def _reward_base_height(self):
         """Penalize base height away from target"""
@@ -1009,11 +1032,11 @@ def _reward_base_height(self):
 
     def _reward_torques(self):
         """Penalize torques"""
-        return -torch.sum(torch.square(self.torques), dim=1)
+        return -torch.mean(torch.square(self.torques), dim=1)
 
     def _reward_dof_vel(self):
         """Penalize dof velocities"""
-        return -torch.sum(torch.square(self.dof_vel), dim=1)
+        return -torch.mean(torch.square(self.dof_vel), dim=1)
 
     def _reward_action_rate(self):
         """Penalize changes in actions"""
@@ -1025,7 +1048,7 @@ def _reward_action_rate(self):
             )
             / dt2
         )
-        return -torch.sum(error, dim=1)
+        return -torch.mean(error, dim=1)
 
     def _reward_action_rate2(self):
         """Penalize changes in actions"""
@@ -1039,7 +1062,7 @@ def _reward_action_rate2(self):
             )
             / dt2
         )
-        return -torch.sum(error, dim=1)
+        return -torch.mean(error, dim=1)
 
     def _reward_collision(self):
         """Penalize collisions on selected bodies"""
@@ -1064,26 +1087,26 @@ def _reward_dof_pos_limits(self):
         # * lower limit
         out_of_limits = -(self.dof_pos - self.dof_pos_limits[:, 0]).clip(max=0.0)
         out_of_limits += (self.dof_pos - self.dof_pos_limits[:, 1]).clip(min=0.0)
-        return -torch.sum(out_of_limits, dim=1)
+        return -torch.mean(out_of_limits, dim=1)
 
     def _reward_dof_vel_limits(self):
         """Penalize dof velocities too close to the limit"""
         # * clip to max error = 1 rad/s per joint to avoid huge penalties
         limit = self.cfg.reward_settings.soft_dof_vel_limit
         error = self.dof_vel.abs() - self.dof_vel_limits * limit
-        return -torch.sum(error.clip(min=0.0, max=1.0), dim=1)
+        return -torch.mean(error.clip(min=0.0, max=1.0), dim=1)
 
     def _reward_torque_limits(self):
         """penalize torques too close to the limit"""
         limit = self.cfg.reward_settings.soft_torque_limit
         error = self.torques.abs() - self.torque_limits * limit
-        return -torch.sum(error.clip(min=0.0, max=1.0), dim=1)
+        return -torch.mean(error.clip(min=0.0, max=1.0), dim=1)
 
     def _reward_tracking_lin_vel(self):
         """Tracking of linear velocity commands (xy axes)"""
         error = torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2])
         error = torch.exp(-error / self.cfg.reward_settings.tracking_sigma)
-        return torch.sum(error, dim=1)
+        return torch.mean(error, dim=1)
 
     def _reward_tracking_ang_vel(self):
         """Tracking of angular velocity commands (yaw)"""
@@ -1092,7 +1115,7 @@ def _reward_tracking_ang_vel(self):
 
     def _reward_feet_contact_forces(self):
         """penalize high contact forces"""
-        return -torch.sum(
+        return -torch.mean(
             (
                 torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
                 - self.cfg.reward_settings.max_contact_force
diff --git a/gym/envs/base/legged_robot_config.py b/gym/envs/base/legged_robot_config.py
index 2955f5a5..221a6afc 100644
--- a/gym/envs/base/legged_robot_config.py
+++ b/gym/envs/base/legged_robot_config.py
@@ -163,7 +163,7 @@ class asset:
         file = ""
         # * name of the feet bodies,
         # * used to index body state and contact force tensors
-        foot_name = "None"
+        foot_name = "foot"
         penalize_contacts_on = []
         terminate_after_contacts_on = []
         end_effector_names = []
diff --git a/gym/envs/mit_humanoid/humanoid_running_config.py b/gym/envs/mit_humanoid/humanoid_running_config.py
index 86a113f4..fd21c9b4 100644
--- a/gym/envs/mit_humanoid/humanoid_running_config.py
+++ b/gym/envs/mit_humanoid/humanoid_running_config.py
@@ -142,7 +142,7 @@ class asset(LeggedRobotCfg.asset):
         #         +'humanoid_fixed_arms_full.urdf')
         file = (
             "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
-            + "mit_humanoid/urdf/humanoid_F_sf.urdf"
+            + "mit_humanoid/urdf/humanoid_F_sf_learnt.urdf"
         )
         keypoints = ["base"]
         end_effectors = ["left_foot", "right_foot"]
diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 67b21868..17167286 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -11,7 +11,29 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
 
     def _init_buffers(self):
         super()._init_buffers()
+        self.oscillators = torch.zeros(self.num_envs, 2, device=self.device)
+        self.oscillator_obs = torch.zeros(self.num_envs, 4, device=self.device)
+        self.oscillator_freq = torch.zeros(self.num_envs, 2, device=self.device)
 
+    def _reset_system(self, env_ids):
+        if len(env_ids) == 0:
+            return
+        super()._reset_system(env_ids)
+        # reset oscillators, with a pi phase shift between left and right
+        self.oscillators[env_ids, 0] = (
+            torch.rand(len(env_ids), device=self.device) * 2 * torch.pi
+        )
+        self.oscillators[env_ids, 1] = self.oscillators[env_ids, 0] + torch.pi
+        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
+        # reset oscillator velocities to base freq
+        self.oscillator_freq[env_ids] = self.cfg.oscillator.base_frequency
+        # recompute oscillator observations
+        self.oscillator_obs = torch.cat(
+            (self.oscillators.cos(), self.oscillators.sin()), dim=1
+        )
+        return
+
+    # compute_torques accounting for coupling, and filtering torques
     def _compute_torques(self):
         torques = _apply_coupling(
             self.dof_pos,
@@ -25,6 +47,44 @@ def _compute_torques(self):
         torques = torques.clip(-self.torque_limits, self.torque_limits)
         return exp_avg_filter(torques, self.torques, self.cfg.control.filter_gain)
 
+    # oscillator integration
+
+    def _post_decimation_step(self):
+        super()._post_decimation_step()
+        self._step_oscillators()
+
+    def _step_oscillators(self, dt=None):
+        if dt is None:
+            dt = self.dt
+        self.oscillators += (self.oscillator_freq * 2 * torch.pi) * dt
+        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
+        self.oscillator_obs = torch.cat(
+            (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
+        )
+
+    # --- rewards ---
+
+    def _switch(self, mode=None):
+        c_vel = torch.linalg.norm(self.commands, dim=1)
+        switch = torch.exp(
+            -torch.square(
+                torch.max(
+                    torch.zeros_like(c_vel),
+                    c_vel - self.cfg.reward_settings.switch_scale,
+                )
+            )
+            / self.cfg.reward_settings.switch_scale
+        )
+        if mode is None or mode == "stand":
+            return switch
+        elif mode == "move":
+            return 1 - switch
+
+    def _reward_lin_vel_xy(self):
+        return torch.exp(
+            -torch.linalg.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
+        )
+
     def _reward_lin_vel_z(self):
         # Penalize z axis base linear velocity w. squared exp
         return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"])
@@ -52,6 +112,7 @@ def _reward_tracking_lin_vel(self):
     def _reward_dof_vel(self):
         # Penalize dof velocities
         return torch.mean(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1)
+        # * self._switch("stand")
 
     def _reward_dof_near_home(self):
         return torch.mean(
@@ -60,3 +121,33 @@ def _reward_dof_near_home(self):
             ),
             dim=1,
         )
+
+    def _reward_stand_still(self):
+        """Penalize motion at zero commands"""
+        # * normalize angles so we care about being within 5 deg
+        rew_pos = torch.mean(
+            self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1
+        )
+        rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
+        rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
+        rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
+        return rew_vel + rew_pos - rew_base_vel  # * self._switch("stand")
+
+    def _reward_stance(self):
+        phase = torch.maximum(
+            torch.zeros_like(self.oscillators), -self.oscillators.sin()
+        )  # positive during swing, negative during stance
+        return (phase * self._compute_grf()).mean(dim=1)
+
+    def _reward_swing(self):
+        phase = torch.maximum(
+            torch.zeros_like(self.oscillators), self.oscillators.sin()
+        )  # positive during swing, negative during stance
+        return -(phase * self._compute_grf()).mean(dim=1)
+
+    def _compute_grf(self, grf_norm=True):
+        grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
+        if grf_norm:
+            return torch.clamp_max(grf / self.cfg.asset.total_mass, 1.0)
+        else:
+            return grf
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index d442216d..826480e7 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -3,13 +3,15 @@
     LeggedRobotRunnerCfg,
 )
 
+BASE_HEIGHT_REF = 0.80
+
 
 class MITHumanoidCfg(LeggedRobotCfg):
     class env(LeggedRobotCfg.env):
         num_envs = 4096
         num_observations = 49 + 3 * 18  # 121
         num_actuators = 18
-        episode_length_s = 100  # episode length in seconds
+        episode_length_s = 5  # episode length in seconds
         num_privileged_obs = num_observations
 
     class terrain(LeggedRobotCfg.terrain):
@@ -75,13 +77,13 @@ class init_state(LeggedRobotCfg.init_state):
         ]  # yaw
 
         root_vel_range = [
-            [-0.1, 0.1],  # x
-            [-0.1, 0.1],  # y
-            [-0.1, 0.1],  # z
-            [-0.1, 0.1],  # roll
-            [-0.1, 0.1],  # pitch
-            [-0.1, 0.1],
-        ]  # yaw
+            [-0.5, 1.5],  # x
+            [-0.55, 0.55],  # y
+            [-0.35, 0.1],  # z
+            [-0.35, 0.35],  # roll
+            [-0.35, 0.35],  # pitch
+            [-0.35, 0.35],  # yaw
+        ]
 
     class control(LeggedRobotCfg.control):
         # * PD Drive parameters:
@@ -97,22 +99,25 @@ class control(LeggedRobotCfg.control):
             "elbow": 40.0,
         }  # [N*m/rad]
         damping = {
-            "hip_yaw": 5.0,
-            "hip_abad": 5.0,
-            "hip_pitch": 5.0,
-            "knee": 5.0,
-            "ankle": 5.0,
-            "shoulder_pitch": 5.0,
-            "shoulder_abad": 5.0,
-            "shoulder_yaw": 5.0,
-            "elbow": 5.0,
+            "hip_yaw": 2.0,
+            "hip_abad": 2.0,
+            "hip_pitch": 2.0,
+            "knee": 2.0,
+            "ankle": 2.0,
+            "shoulder_pitch": 2.0,
+            "shoulder_abad": 2.0,
+            "shoulder_yaw": 2.0,
+            "elbow": 1.0,
         }  # [N*m*s/rad]
 
         ctrl_frequency = 100
-        desired_sim_frequency = 800
+        desired_sim_frequency = 1000
 
         filter_gain = 0.1586  # 1: no filtering, 0: wall
 
+    class oscillator:
+        base_frequency = 1.5  # [Hz]
+
     class commands:
         resampling_time = 10.0  # time before command are changed[s]
 
@@ -138,6 +143,7 @@ class asset(LeggedRobotCfg.asset):
             "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
             + "mit_humanoid/urdf/humanoid_R_sf.urdf"
         )
+        # foot_collisionbox_names = ["foot"]
         foot_name = "foot"
         penalize_contacts_on = ["base", "arm"]
         terminate_after_contacts_on = ["base"]
@@ -148,35 +154,88 @@ class asset(LeggedRobotCfg.asset):
         # * see GymDofDriveModeFlags
         # * (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
         default_dof_drive_mode = 3
+        fix_base_link = False
         disable_gravity = False
         disable_motors = False
         apply_humanoid_jacobian = False
+        total_mass = 25.0
 
     class reward_settings(LeggedRobotCfg.reward_settings):
         soft_dof_pos_limit = 0.9
         soft_dof_vel_limit = 0.9
         soft_torque_limit = 0.9
         max_contact_force = 1500.0
-
-        base_height_target = 0.65
+        base_height_target = BASE_HEIGHT_REF
         tracking_sigma = 0.5
 
+        # a smooth switch based on |cmd| (commanded velocity).
+        switch_scale = 0.5
+        switch_threshold = 0.2
+
     class scaling(LeggedRobotCfg.scaling):
         # * dimensionless time: sqrt(L/g) or sqrt(I/[mgL]), with I=I0+mL^2
-        virtual_leg_length = 0.65
-        dimensionless_time = (virtual_leg_length / 9.81) ** 0.5
-        base_height = virtual_leg_length
-        base_lin_vel = virtual_leg_length / dimensionless_time
-        base_ang_vel = 3.14 / dimensionless_time
-        dof_vel = 20  # ought to be roughly max expected speed.
-        height_measurements = virtual_leg_length
-
-        # todo check order of joints, create per-joint scaling
-        dof_pos = 3.14
+        # virtual_leg_length = 0.65
+        # dimensionless_time = (virtual_leg_length / 9.81) ** 0.5
+        # base_height = virtual_leg_length
+        # base_lin_vel = virtual_leg_length / dimensionless_time
+        # base_ang_vel = 3.14 / dimensionless_time
+        # dof_vel = 20  # ought to be roughly max expected speed.
+        # height_measurements = virtual_leg_length
+
+        # # todo check order of joints, create per-joint scaling
+        # dof_pos = 3.14
+        # dof_pos_obs = dof_pos
+        # # * Action scales
+        # dof_pos_target = dof_pos
+        # tau_ff = 0.1
+        base_ang_vel = 2.5
+        base_lin_vel = 1.5
+        commands = 1
+        base_height = BASE_HEIGHT_REF
+        dof_pos = [
+            0.1,
+            0.2,
+            0.4,
+            0.4,
+            0.4,
+            0.1,
+            0.2,
+            0.4,
+            0.4,
+            0.4,
+            0.5,
+            0.5,
+            0.5,
+            0.5,
+            0.5,
+            0.5,
+            0.5,
+            0.5,
+        ]
         dof_pos_obs = dof_pos
-        # * Action scales
+        # # * Action scales
         dof_pos_target = dof_pos
-        tau_ff = 0.1
+        dof_vel = [
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+        ]
+        dof_pos_history = 3 * dof_pos_obs
 
 
 class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
@@ -184,6 +243,7 @@ class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
     runner_class_name = "OnPolicyRunner"
 
     class policy(LeggedRobotRunnerCfg.policy):
+        disable_actions = False
         init_noise_std = 1.0
         actor_hidden_dims = [512, 256, 128]
         critic_hidden_dims = [512, 256, 128]
@@ -199,26 +259,27 @@ class policy(LeggedRobotRunnerCfg.policy):
             "dof_pos_obs",
             "dof_vel",
             "dof_pos_history",
+            "oscillator_obs",
         ]
         critic_obs = actor_obs
 
         actions = ["dof_pos_target"]
 
         class noise:
-            base_height = 0.05
-            dof_pos_obs = 0.0
-            dof_vel = 0.0
-            base_lin_vel = 0.1
-            base_ang_vel = 0.2
-            projected_gravity = 0.05
-            height_measurements = 0.1
+            base_height = 0.0  # 0.05
+            dof_pos_obs = 0.0  # 0.0
+            dof_vel = 0.0  # 0.0
+            base_lin_vel = 0.0  # 0.1
+            base_ang_vel = 0.0  # 0.2
+            projected_gravity = 0.0  # 0.05
+            height_measurements = 0.0  # 0.1
 
         class reward:
             class weights:
-                tracking_ang_vel = 0.5
-                tracking_lin_vel = 0.5
-                orientation = 1.5
-                torques = 5.0e-6
+                tracking_ang_vel = 1.5
+                tracking_lin_vel = 3.0
+                orientation = 1.0
+                torques = 5.0e-4
                 min_base_height = 1.5
                 action_rate = 0.01
                 action_rate2 = 0.001
@@ -226,8 +287,10 @@ class weights:
                 ang_vel_xy = 0.0
                 dof_vel = 0.0
                 stand_still = 0.0
-                dof_pos_limits = 0.0
-                dof_near_home = 0.5
+                dof_pos_limits = 0.25
+                dof_near_home = 0.1
+                stance = 1.0
+                swing = 1.0
 
             class termination_weight:
                 termination = 15
@@ -245,13 +308,15 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
         schedule = "adaptive"  # could be adaptive, fixed
         gamma = 0.999
         lam = 0.95
+        discount_horizon = 0.5  # [s]
+        GAE_bootstrap_horizon = 0.2  # [s]
         desired_kl = 0.01
         max_grad_norm = 1.0
 
     class runner(LeggedRobotRunnerCfg.runner):
         policy_class_name = "ActorCritic"
         algorithm_class_name = "PPO2"
-        num_steps_per_env = 24
+        num_steps_per_env = 32
         max_iterations = 1000
         run_name = "Standing"
         experiment_name = "Humanoid"
diff --git a/resources/robots/mit_humanoid/friction_model_L.pt b/resources/robots/mit_humanoid/friction_model_L.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1d680d5aa88e8f496d957fa7db1640c98ee00e1
GIT binary patch
literal 13519
zcmbVz2{={X+qQX56j4GlCQ;_$>}MU4%A86nl1Mah(4<tx5FtWIB2$@4k#hF44w~m#
zk_HW$=TW7PU;p?0s{i+X*Y$t*xz@F=z4vq9``-IJYwxx1y_TnosF;wDl$6lFEeb+I
zgaRT0Lqb<BGzkib3NQ{|ved~zUTDrA%SMsFz_6uD7Y0U!hAoej@rVdo7_l(OF*Gnr
zh9@+}izjU0Bi&EEbb|4!g`tZ=qIe>KQDOb8#{Mg!LYGF$_(v>Uup)G6kbl&|<&j|#
z{wqyoc%lXdK9c>Q$o|NHMGJXizaTPBOTz-9rvKU~!xQ%r?vEMIlkkz~uLD;E1=x<~
zNxIz=j^as8a}jbG<uW*lC+#xWML3BklM*r@MmCBk=fxZJ4|%_$1)%|vJo%9R!-a^2
z{Gyw{8~i(+g3G8Vp5i}jiM$~$!il`0znGMwc*<V9VZYAJ^uNwc<qs}Xp6c&h!`+0$
zqIe^INgI{KQ|njQFKy&6LiH$~h8Iusmo(E~(nkG3XU5a|oo@7iH0^&$(;1Ma`-@30
zil^_z``2YR`!8u@{@^m_8UD^S_8)0R|1E9YFKNcV2u-4R<Gpwjen~U?CC&5?It!lJ
z?{ww^(k%WZ&2m7R)h{OND4vZM&-T|<GygAXc7JeL@;JY9F)q8Kcrd^v?$UqJ$S-Bw
zUwrmaJO?k{#9zwHe<^ePgUyOJ>323K7x5_G<bT)_c~b_IP5s5>9L00-;<>Jf;ko^z
z&VSj$sDOd2=ehrrIUWW+{YfUo^Nit5`{!^g7e++(XFSi#ML5z&q<`1a3B2hsycr??
zY#2!S$gtq3$k0X0C-7#*@VtL*|0mFtH!Ftc^Y4MCyxB3lIll#(@#e<xeE&VrjOQ1_
zoA+CwInO_aH~-%Q&3OSayaoSkUlF;$*waN)>Rv$VglbVCp@HK0Kel~&A=N+HzR7qe
z2c^GS{;%6WfcrZ#rcOYHt$c}MzOLNGTak5LPy{x*xwvfGXD}B{gw(4OFzQnlO`Ck1
z+CG*im-%w^{M0HkUvDrOvGhCHbx_!TW9l@4ynDPLuyi+hwCp0xpJ-#>IPU`u{4rgy
zxceljeQHA6lw$3zq`#pzPel+I@RRUcVo9Hh9DTI%6#bZU7<WlqbN#QnQPIv$D%a*G
zsQodQ+85+<H|p367VK%F)3SR7@0{*Y!*&;pRGdjm0*7<sUX2o@m}XK>=`*DFizX3V
z2t`-VcU1k!XrgtI%dKcQMpUoO5Lh033v$QwxCf^^K`};~oDn8mUieT5Y;@#yTJdOr
z|73a~?FA{=Jy|fSG#P!5J*A&t$_b>NuO@-bRM1>xDo8aMOfPt7lefNHZeP$@!Gj03
zU=sh7c5PLn))k^eNFrMxHf$?-U$9JIvT?NF5`QE8V&o^-7R-{;O*_D1_hI@aA)cn(
zlj0s#tYbnYy~&Ki7I^vmI-@&kE!{k74?Yt)Neo}TBz?P5$-P;p=&gl;s5+#I3~H?*
zy@lnp#73LjDK179Or5}$oT5jrUE<K(Gz)HwjV;L<ox-i!Wre+@fSf&LOGcC^gSEpN
zX6Vle0zVT+?r<t0=-K{;UYe*yY{rO_M5k!-;Q0cWqT@h!%({=NJA=3x#nx1iq0Eg5
zHm28eVhK0-8tK)2OEz6TK|T}-b5}L<?3&EBli<Vp<ixx*>g8C0-xY6R$o*|lASErB
zF=;RDYC1++Hq2&HChin0PTed>Dd3Ymkp<MsTb%N0dTGg)je@9>+xT?uC_&)!z0_20
z5%GDMFGzY<M7Q|Z2<$vI!1495f=rGOck{YV=2pcvQq-=5W=B-$8M|WGm+nEnm`js`
z*KXo6XAADG>aX<HPY3Sf(~?A!e}Uu;olTTVt4U<L1NX||X!f2_FqtH)LEZNphBwz`
z@yyH)`g}(+Dt{FfB<Ojfd2clh@|F-JM46F$sl(`2c@;tKARangT1Y<ZjzFo?>0tfA
zTcA^uK?=qTATkZ<+t-ey!z)!l^$N%t(K&S9qFxdk8ckpOjuFI$XOb)3=A`J&A>tGt
zM~i>x3oeeVfQk7sWcR{4GO>F*{Z<z%2#@e&Nqao`AeIB`zvhs(&{DyyWjA2T1Rd_V
z69+Nea1goJWGomrcn@u<svvuAM^H|KD%~^JS@7+LJH703h2*TVAWP@Va09Ak>^<hK
zpo`;`=mzs*!gyL^ROu9M>{)60X4`!<*LX_&GVKIm@usA;>ku8G=1jNSb+XM)_FRLv
zu`QA--H7(VL1a?=2zV)|fo<9PEld2z32uDv#Y4K+$>h7@g6t)&f@=320-rp8I{0-h
zd8BxQ^i4Kysq!wzO`}_=qwh-+RiMUBy1PkWYA8cDd+(;X%ro-*oUmZ6&>I?T^&0#i
zdC`yaW@6`$yVO<OQLq}12prPdXri(xt@pE~YpSQw^eMrDcZ21~6K6T(&usu7TLta{
zBUyp7>o~4k*LPCxRlvP2{h4HY>I)({^GIA;EqM{6K_9<5iSMqb2!zMSG9&sXlbo9K
zg4eEZXqWN_0poWQ)i`SeuzZ&wE9VC>2x}!(3X0tDAP~6Lj1#0z5<s@T0o9W{!VJ&&
zPOaJ#=}QZw$4yeGLs<dCpIl9nKL`l~-$!v*#9pI;sbvDkgrCG>(`-6@m>#WN?@H24
zbOm8;>fGu}*ZD5!Dj1VHl3T1ko4%{{g>IR(C{Iq2o`70f(cmZWATgxjLLu21s>=kA
zeMB2eWdtu(`pBl!L%0jG5^2e}*@7*#uknMG23K&>kh^gH0_M{o1%aYAQm4&dh)|{v
z5sTBM@0LrEK98BiBwK|XmkXrwBe+z(kW1XEx{1&Cy|nYqBD$KY3H0Mr1#!>p1fN!0
z3Z@4b(e*bissFcUgk$mq%g)6M9y?z^(bH+9Y`m_$x$j8siL#AkZN(C5`DrKjXt^tQ
zett3c+splsY~IMa`k&w)zkeAV#be3iy^eGZSw-kADf{XJvjif8O$6JQXjADW!|5rp
zSM>Z%XEJ|$1$WE$(>VP>2#Ye&(Du}xyVkIW+kNLU#?Qzi{HzIF;fOYF-w6qOBfHD|
ztsWKJV>zDy{l}BBMT1);GUUj|F&~KkI3vOzmq%t@RA~9C*Fmnh=unY6GUQ2j9T^cM
z+tRx-UvTh9I``EIZ!+Eb9jR&@O#O2C^!d*svhL^?n6YvSzPodd8rh#FZ5&BKp_C#y
z7@A6N6%P`epKV2oR?5=@CzObkXC1BidIaBm?j^eU1_C>1BpIU@QQ689WU(rTo92=z
zaJnSSo#OwRjd?emyLrDoH~Dotjg2U#9w8cB-69#H3(?#Y2WC+2<OA@qV?XEyN(hqn
zZ0FvdtAQ&lMY-c1rgJ9eY!wVV)6YM!O4-=oVIC;|zh}DGpU?COP7Xps!#nbs{qOwk
z_AD9CjyaObfwDv<(@Y+!)Wi6<&tGATuULcQtS){EXDge&&E2Lf!WZs1-fk|d)PeI`
zZZpSrP&*gX1N_ZuhK!F(CO@7X&yE%%&9*b^n7mL;{+eKYe5_`}8TIKTKSSD+b8m1Q
zOfy=DvsUb5m!u432lRtOMEl169@9Yi5A>T3=vR0f4aOY}WVoI+y?51|l}qh|$9tR6
z{;3?X%&@`D&tEh9rG%i@LV=u7t%T^w`xwWokNBT|>Vn<Rg&<~AhD{>N>CYzzS<yL_
zxP8GBoMAMC%qlSC=dH3PkA`Z(o(qoHl6wNC`3O<dpTZFRBbsmNa+h6vr~u?APbE9%
zKgWZr_W1dQD>6&+;rjs(D7W0lpm_{m{pSzdeAt+KW^^S4oxB1DNkd^ptQcK*tBu`U
zdW@uXsxf7!^}wQa4jCc;5*saYn0aTubL`5O)5WhXX~E5G*g0h{mN*!Lq<AZM?#Tls
z7bj3ysz4`Rxrdfj<{)1c2efq)vDw~&W3DfT3*B+#>+43l=$%t&rk615slSgsqWzdX
zv7?JVbx(m^ym*wjU%>pV+s9lPX$$JptD&UqF(Y-7CAf10xAn+Jl)Sr~o{||04LiCZ
zq<ja>DD{S^=Um{yu%9qO*9csB&Cn1ylFlfuz>0OcutjGIIklbzjnHS{?$&|HO|saQ
zT8v(@M(l{usr*xpDn$Iu2x>909xa3R;lvw0{Q1)LaQwtxkQ_f7_iagp&x`J1>Ru`E
zkafVXVqRd>kPg8Ow$K;Wh~oC8*kJVp*E^=MPmQkg-4>5wQa)yZg49D8d14oN=b27j
zYNO~Jz71FT(+s9&aS;`5?E&>c_VD>-F1Ms}grH2Tjj8qvBcqO1lctpYnB`JI9fO;R
zbmj=bg*i*;lLHBYmrvcvq5VxTeWDVr5Bo;L<}gH6<rw`~Uc$ve@l@rwC=Ig6CKx-M
z4p*1tE;|}Rt1gAonG5sjslapnJ*O(LI-|?5+GDsDSJLQ)^|{1LCx8STQzgsNH*qzU
zM$?QXo1n04I~*A)g%>mHQAZxArFRi^oVlEC+mTL7<x|OR<IBWx`9+8o&!dMw%L%lH
zx<ku_lSFI83FHJ%qg|y3>FifrdOPPfv*3#)_vJYQRwGUezIHDm2e#V6@?-np<%ueY
z^=NEPn0f#!y~3!Sv>N)ZPk`oAKd8lpR+N8QPmPBP$dMzr=(NXoXi?@hmiPWFHlKB7
zMj9)lN1O=TRDX?0d~%s_<__itp6+G}qwnB`-gA)Y5dg1rjX|q<8#v!vgd);1P<TfY
zM#~g)Hr~9<9!z@*2PRu%^tGj|rjil2WmFlfB9R4mmGt}jJ>g8T)h4EI?JGEJkp!x~
z`H)g_1BB+JQ?E)B(gnHXrC=5&URA=W)03&k)i`8Dq;b*eX0~1O9Ge~#OWf2sVC0bp
zF6T2j#pZ9wuH6?f@xeJ}Y0@WH1Kp5codAZPB2l=<2#l63#NDfw)7g<6xH-d*zdVYi
zhwEf<8@GgtTGkV(rxJq8Su<#+!ZcjeXayc$-gCBX_eU!zVOE;P;%YMi)*e1d#N0=a
z)S-5unxDY7xzsSBnW`|Ux|^{szRju)3#H8D<;c}VaB5hGA|8^|c~2Ui^{58-<uzo_
zj(g3}-xojeBpdGbY-fvqK7_*wj_jMP&1}tBVP<d1DpEhTfGVu?!i#UTSpK#IV(J?~
zYTRRRb-gNm9P)!LdDKI*H&vp!Tm`AzGM1#q_K;c5^89&&j)TTzX}Wpx6*M!NN*x!f
z(8ES6NYTU>&{(;P-S%WBtzLN<+9Ge@hU_gE{%r$$q@e&`jS{0p3Uld-xNh{x(qg3@
zs!4PHAi=&IWBQ~n9Nvf(Frv;@C>G~JPQ0v!yKnXrxlQjOYT7w`Gw%U}gf@`y_tG?c
z@_swT@CLm0K$65PC19wrlLh4uEDc$Jm3KcdQTIfMe32;Wd}>eBr&ZwzUo*n7Fkq`c
zs)BgkQ8uUOGw%K1OyxNK%sAb6wqkq;?b;UukM9>l*!9~mYkw<KlywIW=)Ps9CZ6R?
zt8=0<eL&7EZpWn`ObDF0O0&)s!uoa@^o<;j5{i--Z?u@aRZ0SxeK(=LK$aXBrbQ&R
zH7JN@Lng<L_zSvG^Wbd~m$8q^x*2jmY;hz@hB=bGhSQ1s4q-aU9bvUW1ymf2g%g!J
z#LZccb~GKu=JiR;?#>i8y-k)Jd?|)cKI<@dig@(v+m$f#=~jNzxChMc+1~V1N)%p9
zTZFU9rjWGJCrItoQ{XZ$moz4vzz|DK?#;4tn!I%<xR-vTp8iIVpPdhRi*|FWdNrxH
z{Cm3pIFFgDCIJmC9(34<YU+M8o;Whb)XX*#UlnK2=~K60by_Jd4$#G@zy{=mJVgVS
zIC65xH0rdQWwVYSL*3)T)Hhj(yu4m4SY}!avAywZ;MyeW*1m^by+;l8#`(Y`b8k9m
z;&CkbGK8$3&_tqCCGn_K26ft7NjzNxITOs%!ML}CELiY~Iv&#k`4}<yV%3g|U5=2e
zVf8fIL=p}Z=`+0-GHI)uj$qH`T8v&JLkD+Tk?-#-pxY=JS4~nC1e7JyVG90&H}B^#
z_3a{PG^Z7ZZ5YiYdic?)MFOym8cJq8HsGq^1G?t*5ju^3l-8_Q6wrp(_)%DtyM6B<
zvdMl8H=-h&xp340#~+*wPPfjGQM-mg_}O06y}5}RMVHc%mg4ku(r13<F$cUl@hD`}
z$>H<whSXEWlHOmth|agj#vk)E$&r#aw(6TR8+T6<A6_lTTg(njzM)L@ohDK_>10Yq
z&Zg(p)CFUe!&upME#!yE2|S)1&Mh;TXLlrg0^TaBVyk7d(9z?A-Q%~z37c}3X!bmZ
zot2hEbCfB$pI-zWFVdMgZ+9`_!+deDY8<&IFCkFBco^a|SXT9>2430SjQNYjNau^Y
z(3%(oH(iY&c=ZC3Xs1nD_PD?i83B>5>EWD}oPy<wx$t?vn&9lKr|{H875(nlGew$7
z#8o(qI_?#~`&(MH=lwUrN!Fnn$x<Y=E0fYfmVFeM4RaKg1fnAvXhp&V^5t9wed=OJ
zM|)>Md7L=fnN-1<ZFg~DRs$?F+e~gR*0<XcwU6312M}lF7yJ`j4iUQ=39j1La+>g<
z5rSp&m|Rl>I#;@ic$7?si2Oo^ZIwd%@3V+zU^;$qC<e!C$SIBur(3JE=*+L>tmca%
z>S6i;%=VN}@vqn6yaL7g`<~F}cLdwJ7{Zx#3+7GMB{`PEXu!;BqWD&c+P@iw^985i
z&fZlpeeYd-*SrcJE)1ha4l-OhK_C%rx`fXbo+FPgs?Z@7{usFWG$S0pjk&+l9<$Gc
zLeJ4b0=%0K#jP#y-uMu`zN;0fb~HQ0$Ocj?elSTd5@2`3S}?29AfmBH@l|vRp60eO
z<NCd!zHS97<8%X#|BwYE!$+{@v;}MTVJxM(HB46MP|W#M!UQ!@W}}-fJ94fGK96gq
zr<RJ4+Rim(+|Q3V=c+c<s7xU8b5n5i?w8DyYeA&=aRnKE_8auwslp8H9sT~pYlz>k
zL&D2X)8wd|<Y06SS^Yj8YUSJ6BUcNEXqyyKQ4Zzbmx;zH{qOIdFDy}hz6<2&Er8u4
zMzLo*i}CoJ!=!Aa4B7wa0$3LHVn<meUI|=DlJ^lZWbPQj)SFgRZ=49}QGQ2PwM?Nq
zuOw6J5E&w0J)UM&)M53}HjJH6NIWkV(LsBM&<h8WNl?RMfB<1Kw@HaCXcWhD>pJk2
zbqKkyq)Ckv%Hdp@FT4E95LU0X6g0+s=lfh$2fvI**lR8)$a3gILHl&P>aIu^xZa>A
z@9IL}_5gU2TS7jsy^bSXZ<35?7uc%d4JW1A*z;EmVe%*ms2w3fcc${$H%;b(YU#t&
zI9{IIT5L<!&EkV#>>{{bA<Zq?6^VYcedxFHeR!R7inw)s!2E~=8l=qS4Czj!2bbIf
zp4kKFcqmOvX11{RR?MawZd##s`xI=Mn})^X0?3V$@67mob6oE0Orl&KfVV4`*f~8y
z%Ojgfdm^9Y+~74Usy|}B`y{h^XK%uFD`n(1P66c=vte=c2inxWoDFuEhHuB_;EV-h
zu(!*EZagbT&%DUsh&Pn6J95XMK|vjuZ5a-B2P>(Z{tJ+KR?XC%<WaBEL}uHDPUbFm
z1ifu7&G^roz*>x3k8M#3WT?9XXV-;vh6EqN$%_{8WyZ&0>+p8)`c%MPI5d}}Ygc1R
z%zidqY!^RXYCD^MP81~!M&OLQ{-{0VHfvq}81pp0Gq)c-f{lvGbmQ~oc+p#dY~AyM
zRaV&q)3?pV=ej9mhS+doXnzrh$~*(5*ey)Qa#b=#u$`t9$%CDS1nzIsB-R$gX<%t5
z6A>WJ!cc2)eQwTwwM~N4+$Tg*Oaf73SQ2ui9MNL=H%!VdMfTEely~&8an%<>OCfVw
z^n@~sN^{V}LzI}tu3$9}=CRjxlmy<-`y2gJH!31#M7`W5kd?QF(%8z&n55E(4wWu&
z`%okZZBrsXCP&d<KAYb1-AH>^7UD+R*<^IzQFwN#lui7a4n-4lm@!xL*?aSxN$Jyd
z<Xr7ryf)VbZwkAU8_gF{>a79lhrMB<&A+gap27(2lOe6Vkz|b3TKKw_3;wq8INe*F
zL^h0Krn$M1PTy5Hr#=$p)@9@PD?gZHO=5KXm0K__@FWO*ZG*HH4rW`;q#Y-;L29fd
z4Umvv6!wV1IW0H(P(*{W)mmioS#4Uvl|@#M!*LWS(i4}oh~oCsu=h|4a6(!!&gm-Z
zM3taWO9ieHpGd+MmqM_p8|euzz^<QhpfXp8u5JGa?s-ml)zW~uV{C{oLubONPySTy
zWH`y%Da2&=2GBH(cu1SH9$pH2llaTIsJ$|ZKI=;D_xn0v{MR`0yj+Y?H+CakM|?=i
zkgfFff<S1N)TE!=_mW*FQ!wl`5j+}qhJCf`GscXML0La_GV<dE@VIdwa#Zt})4MLg
zGvzR@Zlf0aJV+h1hm0h5r=>7=4h*F|w&qN-{$Q%*H4UGeb~BDDL&(vj2Q*Z5F}$`F
z#;Jj&OljT_wskcZI|^peiQb#}QJXH1H;!6@i0l^_G%1%(=>AR{A0LHWR+|pZ$soyE
zDm4Gc5~8>|2Og*svT~s|VwxAx+t*IJwp_Kd_Di5WzCYoRYdpQ%<;`rhj{#TaGJLWR
zA|DMYiC*SH2K<ASVOI-<6#C7B{_-FA2WA8Q!Hc=i_+?FX?CenlT$F>@)7|fw5buTf
ziC4f$Y}(4iz7|5AhllJYd9Pz%<w)=aKG6_(`#9&lwgX5gRCA(FALL)(S<N}}qsFc@
zX+2YZ=n~^`#0>6_eu6h@#IfGjh40W5#vWOm$BYZA=VyL607BjkxOHm_!{iBIK)=n8
zBYOtCkAd<Z=r<qG&%j02H&2v!s~Do!>g6C_K8IdAGZ%br`cc2w7RGMXCjOf%A5bb)
zie`K@!~-GIF=?M0xxK*+8gkc=vaK~tzK#lQh_1k>T0e}~F3rj;4u(mNv#4g+DQ5H5
zr9{8X5feLY$Z^X9?5II=Ah7-mM@_K_*-Z?ly_-ocJM{OdSuH{rbb-#(n`qf7%slXX
z%g$^WOv>GInC+=aR;R4vnC%Alz{oQL14<&8yZLHps#3^kWh_D0eS_FFsnWz`nh~a%
z6kvh52o&GyWHJ}kL&xqxthw21e0+TpuD?5m`5Zlgn)OL>Qr~J3vx-J$QlK2(S-J>S
z1TkzvYBkzCi(yrA&8VxN47Jp;AkVfX<1^a^h&nl$`BIwz&H_d9sv!X{k#tr@Vi>ib
zy$d25rja@Bh1hfQbaU3Xy%6!L0&hLNjO`kOsQ&SFSakIxW(cLzg1|Ps-M5CKK!)z!
zDNdr~laUv)8)}RO+3BfB)A-^ctXP~r-Af%g9Qia@T_HqgZkj;9_65M)o>B-eI6y9N
zZ-YroIy_<H@TO}b=g`U;e)mQZJCo_UY?R7%RO2SHMk~!p{V6+CO!b5vQR#4d#|&zd
zB2Fx{JrQ?bWRlukn2!}^<k)6++}^SV$2b%;e{iX0zqofZV;^QiMCe)OUcVpSoIU~K
z?1n+q!);7?_F#BED~TutR5I(U1`)1)8%{xWkpD4-c_nWEV?H&|M_N1BCGv%QC31#w
znk)kOPs%WV$vzyhBMoAeB#CF#SYkLS9A~c^2buI3SX`M$vwKa*LasC-Qvpk=A2Y$9
zvl)kNS-7F24p8(at8y+J&RCn%%agw{gQm5z-2uL+D_;OMBF)U6QyU=h_Hb~}wqpX<
z=HcG<bZAO>f!|mQre`;w5!zAC>MVC8H8OM1X_^wg-BN&!mq#=8S7V^Zb{qa2EJflT
zJz>Ad?%@1PD`bp^`@=~|esg%Q7}*ed7w7#5W}OyAf?kUxewHa?8g@2<nMX#m;^8VB
zyPv?Sj_G(fEEz&t=EGuZ4SKY;2Que1pxfp5{XWSga-nl36ML=_>kEuXgvSN6A6x~m
z<V>+cVkh%4E122yWF85{(de!&Ni}b-X4mwd0@oi>R6Ke$TVr<}zmF3lAM4WS$Y5cN
z)|x~&gexJir(sQD5$tUsV6jaXJWWr*qcP5mnNJp+aE}J(!CA0cClG3SdgzrU1*p%_
zCELV?(TvKqF!GB%J$!I1!VMdcu<u5Xp+E86^hi)XdLP=HDpAHS1?7dDiG-yJt(H55
z(^_iSz1M>=;mI{n(6^yl^Ci&e;%LlF+=_dO*0IuY<|Jj%R&e()WInSq=&H&0n5yUd
z*cBo^=vv9*yI}{xt9ms&&3Fqh^gFS*?+Aa$)HQg<Mvn2WYJw#OE;uwL0ms{(Ve&p6
zWt1nV)1$rPn!kSUfWi<(GU?T6Y|MC!h8hM`d;bbDe{2uqD-wxs>YjjEeH08?GlWK#
zoFt*|W>M!{2ar!<nXRwP;lq~`to8dYCTP$XcK+%e7-|zo+rHjn13O~CCR-dQ?K})A
zEzMNL?>^(F6N-tYQk3o70wvE2&@lV}+P*(wcK~0r(?xG$zHYx~sY-CrVhXoX%bEHq
z^Wflucg%ybZBV&GjSSzS$!Z3y@As-?h{-h%_N+=QtEbt+=Z>8Swa0TXEt<uP8xu&1
z!E&hXZNwJqy;#eg&SYjOkR{bpWJ|ybB0R|tx0hUDvhtV1C;y!!nX5rgZYv{|d0+Tr
zo!*no1>vlpu?hdYEQek<y^lY*PjS;B4&UNvDV&b<htg-+94f2_cWr9HwQUDmws{jx
zf7nXS3XP<}b#?T8J!ReYr=dDW4hoIRSji1Hn7XlQRH;S`FW4Q0vuc)<ZYXAU>wLpO
zO5%{Da2*vt-2}-lU;f*XgXrA@IgI|^a`xstQ+$}D033r(u%F+~wkSSgoY%@T{53!M
z89QxJ_qsA3aC(f>ulBOi%bszjhHqq*j|~SLx|A`~lEoJ<SeWW74hE^M@O5h-zix>H
zrmuen0v&sj+fYP(2RmR@UIScJDJ6+(<OokYneNb3#>r0((wv4YBK1(29nvXB4Ar#x
zx~Fx>qY<ifMCMdtQ0_|?j&o#wDqO<c)Ewx3*~93rnS)KTE+oCfnWZEct0dOo(ufCe
zEVUb_1YCvknp)OMR~fe6qtGg<Nke`-#9Q4zSYh>Aj_!j9eo}-!J@ND~d{ldaL-OLG
zPQjcV&DUcUK0k$3zJXXBXG`9Ch!Bqr=Fs((P?303CQkYt>si4ecXKt^sjbl{YCM4)
zdwKzaB!e)$u^9#L#?$gy44f~KMM1MWmXC|4B5JaP*ESWGD;{Gn9>0J!r3*+)nj-04
zvxUeqBUvY%Tym@CIyl&;L+7PB9PgBdOX|l^^@o`_c-BK^VP7jFBc}|Jo|a%F)xw4x
z*<=?yDw~d(vyaqv59`lj0Uf@=2V_+&(KO~DJ1ze(N;<v7lg@U`lT{|ngBU=A*Ir=U
zaGBFdRN)J|8YUO-1BqSkte<TYI<_=n^PN4oPA#5!9+=8mbj%pEnV*;|&t)zqK4Vnc
zPqEp$t3V`Nghtvr!F-3;?9_||jP$o*?7thLeCRPqinGSnuSU$%(PL=bmrCrae9H`;
zBZfhE0~D_1F~)wnP&-GBxW7~+$tDuy?CLG-hK7eQ``9S5vgRf3@p}P+<-4Kq&3+i}
z_kl?ragZ5RQ_ooCx#O|TGUUagFw#Bs1HGnP#cr>(#^pm^!YCsX&cQmM!G)*rgLVcz
zFnA{0UH+DpG>HL|i$I@-YSTkUb!pWh1(J396uaw{B&rRc!N%k-!@Hu=WJ8W0tEO-o
zuCS_P*vkh@goZv&k7{5)-?@T;gVJHpIc?TLs{i}-_ab(2xE$7_9R>Gjrll<lhid4u
z#WQ<Q^};%QI$D*iF{{Rih3l}ReK)umuE7FtZAgC~3yV_jbFyX?P@_t9@@S$a^$WR;
zfrVS>mx5DFNd9>szMIfr`5L8$PW<A=y>Qcs$9Eg84)0Dm5|Lwj810qk;p5$5MChdo
z6`F5Om8K|D*(yzba$P<6#=B6HY0Yps+l)AvKjMp5db1|_<(wb=?<DsnW@F>*d2prW
z9HtnG5^7#TfAo7G!r9~D%eq`NPA}smezKytMGbU3<;iG&4o&9lg=Fg`WS)TsNiMMf
ztCL1VWvVRoU490>&#=ef{P*m}#}52fYik&kGm1LoE@I4P9<tMIz64g5Bj8#`3Nudh
z5`LO+3v{+FCYty2!6+yR&IL>3^NzFp{R_?$Z+<ni%E17A_GeJj?W!c@)f4a%-$Q+#
zq>#F|lJvEVEPj>Wjn}?Z;QB{9+1p1>VAMP_zWb{WWW(VWJTK!win1l(%eNf>bFAQ}
zgfsng_Z;dfuco%#G}OEnO+xhQKxV;c8o7-_9`wW`igY&fx}IQ~?|G={>%!K9d8qMY
zHze-YWe?riMsuH;;m=TiwsI-ZRZC*Y$*^i>kogBjss9`H>xm8Q;pj8)dYumL89$7+
zHKp;l3?4>XjGTy7;b1yleF$qkRf8n1Du&T1BS3aVAjs^P3^6YoL8)mht*~>UW4!~=
zWWq@LR{tW-Xx|8nEQBaslY*<<F0*%aW-wnO=RwXc4rKOxg*crU+$Y9B#(oR-o9t&e
zllOunJG2GdGhZ;O^|@r$m>PDZ&>Z?QI33ehTXW(A;<2siGGa~#dVSS{!h43qS?D@n
zu`d;xS8NBD&zUr-iiJ02S@23EAEiyy$#G?GvN?}n!$m!Kx9}clSs#bG1bw1u8%s;1
zG^z9B=kQ4?AHS*1#~k%7gx{D;P0ovxB<4I_`|c#R>4<}(s1Q*}(xg8%17KI}dN@Dt
zC)U022JtmF*`?P!VP=&R?M#>g>pgYo4NX@xRhvw!+g<tDn}k7YZw;%bHy-APO~NG~
z3}AP91`MZZ>~P&l?0!8BGH>y0=pOz9YBrt2F1!q{&MJfHu}z$>uG>KRuq>|b5hBA}
z@;PCbQXsEuFWwsA$U2ccaxHigXZig(=r3l2O^-~;N<JZzb7sN2tGhVppNY3h6~K(M
zo9(k!paD8J;pP2!X0Cz~e$Sp|*S2vOo!#rmE}qOmt1oi2W4{I|_@0H0^Tlz;1U=@8
zMIj`OF+l~}eAuw77+lY0LeZ)aH0PILbC4ohj;$aD?ol|>dLnb?WHO}0$>0?yS)wB)
zO)snOgHLA**qe<qq$4sME*_M?aB)7`7Cyk6i>!!i`Z8D&UJMz1ns8twACsT<;D<PI
zDl^{&v-cvrQaZ{kd%qsCA5H|0vN}_C+=(-9Tz}2Eh5>yokmcw|jp~pIIh@CMAL!$!
znO($`vxTHly2MWO@({YNBOEjJS&VpA&(NG+IBw)jkBv^k@+fuq8ZjSs^q%C%&s)j3
z46DU4bJR)b2}?R;)<H}?yAr3%4n~`P-`DuTIgIj`BX5qnK-xD4vUYVm>s+z|#U7kt
z#;Og)Yf(WMv@-~<%p6C8_LL&AjfV_H4V3&)%rNpH?6oz?tVtt>j2^B`vkh{v+Ttyz
z{hSYMei(?hN4Maf0%tTHkqvh)XrY^J0#hKwMWs_2K!51a<l5<Qonr|8QC$q(zZFzQ
zeZ>8NMfmR7Hn!=+PVm&;fHS#uc;>Y_Lvv@K#poV<-Y$f9CaE#^SE+#C>-{J!d<+)u
ztz$Tk6fxG!mr>ty3@?x=BudeT$acR)y^un5D-&b;5_CwGp%|XwCE#nDZP=|mmx*Xn
zpq|Q?!Oq{9<=q#D7ZplW@=Y>MOS{E5oA;qkk2>v3-UT-KF32QSFg6)a(L^T*wzV&Y
z-0-EOvQdUEvhyOh?+b8wbTF<nTTe{mWN}8sF?_o#fNbnoVJCZLBx8OykAB^6N)*<}
z_h0MxI5+DxN>WoYX`&8%(8)uojjlBN`xp`;w*V|e&%hhrS$1oT8*BL^9a`xo7#5y@
z-@nfWo9`#l(4C9x-hRNM5+`QBKUg<v$-DoB|6wuUAJ{&l{6#-zqw^gd(7%?89R2$c
zaegY;`sDLhaPD%xt`;!w^~d4uX*z7jOE0uv{D`Ao(8RPazYP&?-YD~Y0;FaHv#0y|
z$45!6Wkg6KyKYt<r%6{0Ty<LEk>W-+-mRChnKp>cvGst0V_TUePi#Q5fW?Fx=QsoU
zA1AGt`M>Z#EC=+@k<^6EEy9d~)*Lo-fh^wdlE#T)6WNfRO1LiiIXiJYWyL!CIK_p7
zn#Hd5aqw{(gz6pQZ#1fA&feJ1YMVKO64S@O5#H}9Dym`kDIKP(t$`yfqzpN4J6OBg
zdz|KRo7lp1V@8%u=B$66%Ctn?YySF40ltlpV<esDfb>mg#%|di&PCM?&=}{#bT^M<
zAN}+Oeqk;%%2Wb8bBowhPZanGjrM3SKY}q|o5yMmNyqraROp<WjqYp>vr@-{8L4`o
zJ?#6e+3sL}yZly*Q)a#As4DDc8}9P$+Ey=P&7}+R$m0suHrJ0;SwL}DSQr0Lr4994
zVZsP#4m`UYhHsVB__7Pq@PMNl|74UKeBMyaUOYFF<oQYCd*@Mf<VgnVhIT-oqC4B^
zxQKjoGiPor%7eCVB3LoQlr(i@L(C%%+3qb3uhgAzbZa(NR`@Y8N|fU;dOP!Z&2V_P
zV?0}U;1FlpYD?zIPZLng$cHYsRZPXu8=S1+%kh2uTTbpoFZ92l0&dSmus%N#9-V%H
z(~sBVte?yIbH=q|^pY@sU9Koux$7Pu^jPpbV*|mC`nc-;cJ`BK|F`2|L)c0o4X6wK
zf?bK1z_H>ny4tkz_ir!2AAaS)c_7BFxaSPIACEIy_mh|dKMnaa)!#LnM;&KtwYGDt
zZBp2ux_3BZlcLy(LryhMj;&zto6F$z_bY9ENjpF5wjyWN8$*y=ZOa#*GnY}Go5KhX
ztzxs(hOvheTKPSBoA?FF#mwzn9qbV1C}Vz(%f{MWX;zN<#6FNd!YM!I&8ba#{!jah
zz2vC7QKa9$>o5O-e`ht2zn#pZ=A|yljLY#|c8wE;^RK^8{ijXvLuTtL_4|PX<$vZJ
zEM)TEW4it|BHhVh?EiD1_8+r0{_l#J%s>mgylc_Se(^#B#nVN2p!)AQA-|6pA23<}
zT>hRR@;CbTv1#&uK)?5oK>r<WGX8Ju@8iP$#9os93p?R&?C+lNpV&;fzp$o%V}JK!
z|HNJz`4`sgZ|v{);y<yO>VIL)|Hl4)AN&)0Y205}i@&kI=ir~%Oyj?>mVaY^&$U0X
zo)iAUTK$dvJ#z;C@&A_ohW%G=db&tR82u|qL$v>#{<X;bRnPq=0fmHynEu+)j~kc|
cB(%)@*ZRQ3A5RzYfq6edGX1}S_+PRA2j5p9*#H0l

literal 0
HcmV?d00001

diff --git a/resources/robots/mit_humanoid/friction_model_R.pt b/resources/robots/mit_humanoid/friction_model_R.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce831fc89fff4f8c62fbb964ea567c7fafaa2d44
GIT binary patch
literal 13519
zcmbVz2{={X+qSVXM8*`NP-ZeDIr~}1luCwVY9JCxiepNH1|pG&OqHP!l{61H`&mb&
zG#8qcR7&%l67})x|Gr=K|K9Jq{_j55y7smAe(rnUd!J|Rz4pD<a(5OL6A~ISMCjj^
zVM6jkfl)!Bi{gS!76irw8b>Y;pKK>1H2aTbtw>N%M0j{`P|Tu;rO}dZQ44~jf)_X}
z3W|~B3F&$8gmrx-`l-Xm8!ro9v@kS=ClVAB(a&l;FE(aTc(mlasNngri^3PoiwRyD
z9T7Dz&P<Xgs;lcexE~bV9~rnXm?!oNA~`ucA~0s=ubq-SabMy77*pOL-$DKBpx6b0
zlT3MoT^<U@@P^EA7IM~fmQCSFILkT<r|={<h7O34is4Cn@P__F-mhr>qQGdLOlbe<
zLd8OV(T(TH{!TZ{Su=(w_YYe#Pu^KLnWylJNil|}<iS(^b!}$<b#24{;4<T>{LVGP
zMMx}$H}aP>%@m$$zrucLYQG58V|b%HcpAT?nf;Qc`3IdjZ}jhUV+N#Y{Y#qmfHa+7
zOk-ns<2-o(y6xuwB~9-SE(@Oi?_37|NHhFzX-2=K8UG?QiQ$=g@W%g=X8uc>*&lS4
zJoDe_EC!@m{!7|~0clphm?p;XtUY*>e%&>T|B`0&2iF81=XWl~xip3c16<<H{Wp#L
zQpWwoXB)$_^WfS4QfBc>nZqA!Ry@bw*(N)S$MBr~VN2#s8BjL$7t^#Dp0fvUdTavE
z<sWtPmITKH4rD#g^`Fdf)Aj97G9jLO0&m7Yr;7`Yitf*No`<t=w6942q44p%nF&14
z(0_Ieq<nNlNKEvig-gftyb^fczxMwVXvXtN;Q9W0pc!vg0&n(jf#$q93B0-g9%#<<
zOW^tc7HGknm%t17_dp9?U;=OcKl@{&=Nr2_4<7O`Fl~IbsF2V=asMCdzKoE{A9dfv
zbh4e|Up4>NV?eiKnn0qWg=`o-m8t|=a_acH)MSmXz=u~uVsam0*q{aUjsR(ohO59@
zb{|wkjumu^o<+OM59m_8iQIW*mV!@{ys7TtB(kC2n`+;3<-RtYE*P6MnkI$XQn^#^
zn10xf+dIKTFiyK1cWai>m(NaN*$E+fW2OmdzOtL>TkvRQ&S-(#x3PGwK^g6~R@3iN
zR-{@Yht{MOL*SM}bSY0#FvHWFd{jD1#Sb(H4h-VbDd!|e<k?2@T)vf=7rRuj+#?pt
z-)7K;+#BGOmrHfWP7*|IcVJ#U*-AzHdvRojwZP<c5!Y%&GmSIqCLs#L1eQ}DQmu5t
zb=V_D&s+^4qtt}RRO<jbyDEUI+qa%{DA!V@sBEG+L<*cO6iK0mA#HLrCPsH;1iGUi
zW6Jtq^hayD*sGSVh*^hABh47|+GjZ8xeoQ4;D>^BSzvxN499J`3JnPp$fw&@WXos?
zK~{kcG5q2{bmNW?Cf}WQy*t9^b-cm!pi@w*D9W`k(-!cX8i=1$6U~sY=DK&}QTMts
zdd+^YZSmYFC>yKIJy=nU-jZU(?{gd(oo!5>*lBZPMouO>E~b&;_grkZF29cpoLxzZ
z{$hbq&U5PX-k2+>K1`+@&t{9(Ta%WqN=mC<;@j$z?C0E_^mgi2u3K9PU8OoqurY8N
z6?&{kA0`iFYNj0)9F>bAA2M$;o@dm_+{ide`&!VgU;*uWV@uo9>&bal89~qf({R|^
zkPi0MqUY~#Bu^`K1O`$D-25<ml6X-?u;epEt&@Z30;dUFW`!L!&zBKg9{!fbbUYxb
zOS8FEKSAJG%2FYjF<iL-JFZ^0A@}<-ZGnIG0oubDahp8+xzx#(h&d68xl3^PNnLJI
zbT-%H=L^_TwUqo&7)H)Yf9A^Umgfd0OOc<<OETSk8_*S7xT2kwOz2kyHtvK0Eu2QF
z{_zFe-q<~K!%t_yh}kRYH(67GQGzU0s%eGh*-y#QJPkq8>srz|>>V3$LXti;bY$<F
z-6Zb558z<D7}0eppi!N-sJF!E2J=OVq;s$`J=rEAIGPv2eW~({&QeIA2}7RY<+)*k
z&0dQ2`Jhe@?Up4gKY7wK?#Be3_16U!1z(AA${McKBSY>~O?4W+k!2lAOS#i{dE6NK
z12*Oy0q@W&)WlzeJHOF{JIoE~=o8P$#uq&_V<n4DO7FqK_ckp%QYcXAs1f{FQ9x2d
zovFjn)5!U9fzG|t&NST~Ps6SID6KyX@vhROMI?+n$$c74SUFCRmRAoxr`*Xz;Zzb7
zv6prk$O<-iA0}0<`)Pzy2|4{lAGei$!*d&YNPC%?U{C&3?)a0bRH9U$>@<rcN7qfE
zS-CNUC`t)F&nSTk&DSLMnGai&_ye~Y7LpNz)`4|LAMx4v4$haIW{*kUB9yxt4?P%5
zwXH{Rmq;3LU2jhn^w|}VdEXCF@6j`8ZH^qMw>qH8#SErNXP7{B^ddp)vD;MU?FW2c
zah?$~vm^<9Rj435q`}(B2zI6_aX+FOS)Fj6-uQS?FwE@=t<t+k7K8-A+81L5G7Bpk
z(&IYml-M3J|Jre){;G`}%~(r?gl7x3_nc<GH=Q7xA`;1_P7jhdH&#%uEe{9eYe|y7
z2?;3QNj%<sr>CPb(W5Dlj0qYpxF?atJ-%N<@JaI)jSzc*ZYoxE-0%=)v-1_Q>iR95
zUsphr_*+QW_0598PrSG_g*60a!)SbOF3HqC4nbK5xf2o;8zi#RX?LatN$M#l;VmZI
zyr&Zy#OGe2E3Qr>VT%T#@ez5#?8|S+>#rwO2mQIP#|Vg<<sPh8QY6Av<7ukiebV?K
z3w{h)2%_9Ls;4@Eevr7s>{y*icl3M_*tMOfD>tNaWmRsVnb(?zZDWViZx^@Ik8Nh$
z1#AM9Shh>>N+Xs!g@w?ab$x^@o=1<RED~J$)XTEYI`oRX4E0MpNT&^}6~wg13$&N)
zqy<iE@#?5V!k5ex%uBzC{A&wX?wkUc!_ML+sERbm*647<%LjAsEy*D++uQ^TK8ACv
zHy6{$bsiY~`Ld14r$GXM8Qe?f+A#9|RVqW1`|IH)bhBB{Rq|ICyxZg?IF^5#)m*hk
zz*Ti;=ANHVK6txun?9e#4|PZAv9IkQ?^zFzVFSsnc|t!VUL;f3-vhsvLiT3+NXiQ}
zgxoVeWO?+lhOF91@-QtHpErD9U4AHY@7;?R)HX3RIMA1J*qZuLm3G{uQDYi7U!Jm3
z_DWP^;$6D^)jk>!*+C_z70~E5cd|aefkq$qB=Y5!g3FIoxM8Kv+>GEB>Ye7qy?e`0
z(D-^kH>BezSDJf^d664{4-MM{YXm20(t%(>*w@>5M*1~0NsbbHf8b2p1%`r8dU<rx
z!VTn+Rj8mrSdH8NVxt4^^udK`d)M|im<P)L@0~98=R1A;WIG|D7Qrx(*_s6&nf1&`
z%`$ef+uC~9V~tpIb{+V~4q^ry2l0<-E<r!@GJbi9F+XOV5_?g~gyUT|pC6oM4%LTQ
z_MY=YFqf-io^KN7m$tQXyax%v1(D-*X)#CG#M>iTr2;O?HErjo7hho{%?x4XWcPX(
z1yk$X@+|&<{@B7@Yu5I+m<Gy!px<mjf6}C}<a~oUX#M<wCM`V}I66b%ek`1pYOZ4{
zZVQ8|%{cBsxy@v*>Q&_L>!2!gqy&;e3HV|~2#s-AX0zzgE@E_6R#2bn$!?I6CC|dt
zp`H&U<mP4OX+tN@O{VC6@C^h%W~ttsE4bLii0pU}OyWl$gfT}Kl1C4EAm>sdwaU>V
z303J3S2T}^pL-2qD_3!{*LiRQ>-?!?Ujen6B24e7H?!(vGpX^TCc*iInIt2pn3J96
zg)4j$*&*~hGuW<$&Ro5c?!A12mUhc>OQwpEAkWo;4XwwiV}Kk6ztg5)CJV8gH!(0a
zQ<l4UOcoKUxdBVeOZg@n#mVk!S+Z1qA1<-EfT{isq$BY)%=+3#59u`Ho(_BL$sa+S
z2UU@@wWW|Nb%>Mgn+I#Q%HoT}WHxiV294fQ4JG5PA?=kSxi|a(oAUNJCiH$~yrMkN
z?`Z&4snLd+Tg=H2t9on}C^DISvr(qD6ig>B#Cg|!_#9_9npbT|jP>@za=jVU_EZ*z
zH`inI+{5VpF&ES8E79n5D(OwQ3P&qn<NnUY<iIQw&d-nEpnF>*z3IIRo)zhkEy)w;
z{H9#oBAdc|xdGS{cnO!hmZugHx7h9R6Y0L*awgvCCB`eQ!%)>acBrB(sxI6Ho7Sll
z*M@meGb*1oiucFOEgR9pb{Mzn+jr*Stgq;z5KL!g9|8@#*I=2wmU!OtLc?<tNm7tD
zJ2Old?m!%E*4E}W56{5R#nDWbO$uZb4kLz3264v-gh_q8HNC60k@XUu#J$|7j6-FT
zNZYna5Io}vQ(=D_Z+`SfwIiBz0RwntY(2}o{?5kaInaPK8IsU@9iJ=d(jO0Gh~4>9
zcq(EJJr}0Ww6)*D<xfg+?e{t`7`GeV7^bm@Bbq^X*AJZQTEehAJ1TRX#YsPrx$32e
zk*kK%o8sr$c9lMC8F?6_jJ=><=N+CM7QhKr_>6|0`^gHs4S1&G6WA#6NT7oZSyLNI
z-?%#xV_yq$*-VYBxp|y<nD8E^N#A2-YVX&ZZ<&EFDjuR*@B)0{FbpHt4x{JibTf)-
zGtr?Zn%2Kh#zv+X6mJOw|9US|`%Icz*o0vfvzrwxlEj<A+E_dx3iUo+XRn_`(ta74
z0=o||X6Qr4dQmccs^i5(eLRINKPHmOJyWTp|6IKPqnf<d(V;7Ex-zBHM5*DpdN@4p
z3v|9SgZgnJ$?!3Uh*#2ioN#LiJ^nC*-5?>sHTh7E&3m^n!C_lik=S4o;33C-=Aug`
zwuq8^)FCevOv%|@53tGU0DI)L5W$Y^I8p8(IkE2>RF;pYiW?@9gv)7UT!<9?z@8^7
z-g0SWNIuzfWj)>fP8^mDdjiZ=Uvg|@Ar!xihh4Uh;GFVW%&%}HSzq^(n!BO&{>?J<
zb2|?kbO>qvZo|ztc80>DbL>{tZ#H7(7eQ-}JAF0c2mgXgF7|E;rAI)Mx{TWncQk}(
zPV)&6%Bdt5^T&{s*0q?dc!|^U#g0r2FM;A~;dtQve4It4;d-<z89Js2M`WfGgNPjR
zEZ3emyz67rhwp_ICGnV0^Ok+lQchJDUdAWWIJii~6EDPz)R))hVz})JYPh<KqxhBJ
z;_G50^R+u&S=@mNo0KU0+{qNZPRF`8#n_p<ne5gd2c|WuM0;C3o44gXW9~-i;g@e%
z!!kp7*qDrC%AR9d)dD)a<pbu$d}miLv!E{uXQIZ~O^A=|px3gEiMQ1xa%Booye^$H
z^1u;nTVaj`6HnJkeNTtene(a8-Dv7C-jEzwUyOSreCe5lOVC~^MOY>i+;)}X9Q!d~
z)vF6zZcM=8%??B@el&Z!Ya8(ypF?}ZJ232f7#VG|51QixaoO&>D7AGc4O#jQPwfk#
zPn0-dn7WevF<ii@yZ;d%!F|?ZaV1CW+GC8<ItV$fLz!N;J*0czW?23;i+Ydr#P4Tk
zGo5bjj4RZTnub77#u=Pfi$CK2LQBveb_<p4E|Z`0+nDT*Y(OJNYTqM@FPf8y4rdQ>
z4alWmcSv#WmsS&}cjaV^*kQ6ZLX5IYJ~Hcz?xUT2EWYDcV7bULc6ebA79R5e)rn%<
zx50&YMlT!uXS?EZ6s3nh#bJjikJhz(XO@Mg;hfm2`mx?u@Nu^v4YFZ5_rA!G)hkl?
z2~*`!s>uRfOTw`3Nf_)Xy9RBRrLYH+IsTTj;7G$IR64y5?oX5_#!i-)xLBN2>b^&>
ziw*4EqyqNs-8rP^f~vrvI1zj6&QqOfPnp2jb+}bpS1@yU3@3rLK?T<tG*)yGyLVnA
z6C-|&oq8dJy)VI~iWm80;^L7^hu#f5dPN-!x2b}^=1y7^62<>eQqMj~*bF|(x^$Vj
zJREr{L$;}jQMLRocpw~0D-J%vq!B{Yw`v~j+sP+d&8i?>(E=-H9K}6#Yss#(Rybj<
zPqj?afquUN9NkQ|UOth!=vd>|2O~+ETq<+&#T*(e_mQ!hGa5Q=Q`s#~IJDME9R!9!
zV5NTzj(%N-o6d8o&~inZKCS`szl?#4o7dCrB!#S;B2D*+JK({%t7IeBo|gK|hBsI0
zP)=?RvE128c0_!k4jc}B7BZe>zkf&5PgP_4mO>C2QbzaAzk{dKXOpXHNCyqeBs*77
z46b)4=A$i`+00I05=7~wxydm7gBDd<J(fHaZNSAprO5}ILezL^MmxposN91J<e7TY
zlo?#o{HPf|gsg@`TK(TVcK7kJ;5rs8A0=>8krMFw8ku=7MY+a?jSvcFk=b*LS{e4?
zR<-~tf36oSxowa3H_K?;_qAm4LTkE&t3bvDU&QvCW+c3NGJPiT6nti@kh$9|sN0cF
z5HC1|86ibPDYc!wlHkk)&pgN6$}S;!3PH5g?;@VnETjfUwQ!b69}H6zqR(Y_vZ=DC
zfX)AoTkWGsq*N_5PO_n^H@};7P1uQCe!GLSsos*gcK<4Dd@>p~*VJ;7&gFxlM<a~u
z(xuyl<Wc!V0nyzsf*N=#L*Yt&dUSUZ?u=H#v-Odj7ZWw$Q}<oERAd8FKRJrd4~(GG
zZL@IiL^GW7a0^Ji_oB5U5<zm7AM}jh1@jZvqWpzvq*!}7ny;UQkKWpY!3}AQa49Co
zW+_AN)#-HG<_s8boJlqfjbcOsQs~^~Q0O#uW=57Pl5yo5afax1;ATuDb>1%c<Z~`K
z<iDYJ?XRKd%uu?H)uc%`=28i|hQB^_J8JZ4(o4}z)TQVq<a#`2+d5m={OF6Ye$s3_
zWN68{zSlw5*8AX4O)+_DJtt?)MSQHckM^BfO9kUCAoWKgZOBN+soN#7r>Gmmt%|^w
z3?`-?LkWNPTi6k!3vtU%(Z#R2@nv8vZm<y}T?z8kQD2n3;C&P3iH0!iGG*wu^OcOy
z#&u-Pp@pz`Wf}81Uksv$m=jr%Ev%E?8#X8W6m!9B196zX3&LKP;D(7|XnMO1yf$~j
z-ODY&aVvs5&ql(@qbtC2%QK9d{{V$c^Dx>&mU@a8lGuto&>QlcR;MIUqc42s!G1;d
ziSj|xP^$o?rdj;6(Zg^_@he8_nIt`Ydla|lRTn$TEE68aJ;!xpCgV|AVeW;cV%$XK
zTQu?si?76IaT)%0GW6$A?!}uyFfu<MxpHfnoiBzGx$<h(Q%w@LISSyGqQ4-^-;nK|
zzKijDn~baU44~~m3fy{?%(csoBYe?nc2MsVw2DaLWaO`gXL9!lC-4AF?7jd&_bdsK
zG@>%mgSmY_%Q4Q0+ke0I+icS?p=XVD;=$w2kiWT#op5vk4Y1e&J12{hR<#l~uet?p
z7<V!$oznDK(L*+PF&_=;Qczz?i>TMDq2j((<hgMcJc`)F-VxSf^`v~LBiD#Cb9xlJ
z_*nw^wBb5!^_mH%)6_`c{0TI8bP5VQ1hg(<9}TZnAQs+7(b4S^?Ru8Z-tj}a@s0=;
zJUxMyDsChtNPu2R3S1Ra$|(=kB2d<9!<%o*HG25~OFk=bGY?EAVq1<-eTg6<r|(A^
zjAiJmm~BMT@(ff@lO%&V6L2uzppI#~aN_(dVkV&i^Y$VZ{n$lazdj`uJC?xK-ZGLk
zVkEtH#}_W|o=N94C=i1)kHAM}3|ZYihRA+ih}WaOvDxe?=2KNIE!Bx3eDP2+*W8$N
z?h_-`8K2n}zZRGs=s^9JiqIt~HKe_*1qb`Bg~g>Z?6pge!FfU*$qQ4W_W7w~d~g$}
zNgW4aS6ygw+)eDOtcaksn{#A@7~RvA4^QXTaXwkSWFN0;W(-DtvstZK4;gV1L?<|s
z4GGU<W*Scdb*WMuUON)*#4@DdTL+3d42R%G4w}3f4{L7EqK1zW*-_`EXhZJ~*mL_H
z=SF=Bla@WpX7-wyG`%8~UGMXfdb#M*<IQVO&f+^Blh{i`FYbcoMQQXz!XAi^UIRj}
z9Eke$*`Qch0zsocutw3^#JMS-cDss@QQJST%}cFtql<v`5V}YTKN)l74Q7y?XKLYz
zt}Q*eXAd)EXBJEecP6)1g~Pkqn{ZL34^cL=BJNv<a<>K=!WJ=MsvJ6$EN&SEjaI{$
z4^ahpMBfd!B<8b<Z72BOo?hZa?-);Rck=1az!dhu6pD%E4dmwebfPe>fLTx&OGW3e
zp}j));ia+;RgG7tHFs)2d58r_rwdb~NDd^wFQfW%Oh}-A71cZ0L+>?mX^oHoUKgK(
z5ZOxl=)!hjr%Mv!cmvWDmdUqPxJWOJJZW=>(*{@0G{DYXPNbR`V`<$KFb>RQ1S(vb
zpPWnbgoa@E3B&p){oe~At6$=f8ZUOW_#D<+dN7nIUSjl?+{2qoqM>rxB09;=jqTRx
zLf#96K%v`MB9ceDi}I*LYBIax=1Uuy4Xc=Xr!&l5$3l{1cmjKizeD`^27b;qT_QIq
z5?(&lW1>%F!t~J_h<5QwQoJ{xw0kV2Q#6}!VShV0_UcqTzVQV0ipZ(ov@wMoRZhi;
zXhW~6z9QRAf8ttkd5C*+hPrNkPk0Ni;c(G@AEj$IJ>qG>$?;Amt9~exUAKqw&n?p;
z@gpiY1OCAsbWj%>)^8s4m;b;&Fdy&_G;g*u&(@5>?vGB$Ug2=Ap4<!bYOZ5uWhvjj
z*A5cbPKKn^9Oh1;IRHdK<}MfJePIC;JG_hSO+Um;^J`^=@4ny|9Vx;|7d^q>T^4qX
zb!B|~#rTCqbu3NJK_Vi7A`iVebJ#_A);_zAbL9Y5-K@kS2MHL^?;X?UG2nd+l>b1#
z#ejZk2ZAr3TQlaT)^gm1Qh<M0M8KbW5=K6n4sLfw(A!&qK3}#GmHbEIcNt%H`_u<`
zsUZ;#Yzo8u=Z|8<k7A4p*o~QX>NuJW;>&39Kzn04d&&1SjeJnUkNnmGIdmP#Xzk=5
z6FJ4)IW+_B9yrTv_xS|b`*oO&cIL45>_|H5fi8=K#=u?`Kg^$Z4A#ZYC;27a;B9vo
zN>f|$&gXgXu|SVXNp!*(_8^*kEMlT_hNFMF6`rGcwD(vAZW7(XaeJ_jA2YWc1!w?|
zs;oKcFQ>r9dtBnykcTg?J*eMsO`OgXGQ+`lPOy?j(r_p2C<-6GhD)<|;fO)8%x2BY
z(3D}@?`M97!*9pp<Q_+yd1M}m)*#f_NfmZnN+zG>l5o0R3M_nNi0yTe)a&#Ij_ULx
zOmdx%+h1LR@}#|(ec70rbdM)b{DtYsIwgpX)n%({HsU-?Bf;^<alxKN%sA5%XsTGj
z45`<mw?=$r5@v~F&KNP`;+g`!k|*)J!3U5OHD|uY?u8)#EIfbHhV)sMqUFLljNdtT
zDrB|-)C{LlVRbcHI%y<aD3U=h?NM}Z7K8Sz2ecmB&dl9v4BO&$G3B-cV>Uyb)LHIe
z^RG%$$(eWQmqqfp@~{qmww0lt4bo)BMQ?m|&K#$lc0v1Ab=ZFO1sYver$efv*{XNN
zp!#(a+-cZeuMwR|MA9N)hm<f`HRb~LR+qx-Q)&<$*u#F3<<MNQUDW4IIo#FRij6i=
z@Fsd6`y=fPuwH}dEvsqNJ5wFwHhW-i$q?Gtv9-R$B^M60^|38lh3tdthRlQvDSGrn
z2x+hUhIMYI`EeD;aeZYo6aBS@^(^Y>|Gv*hZ4ja}uaBZej;^#S=?Y#vUcv5Z_ypW}
zlc`va5ZUCL&W?ZU$X{mx&|&)%#QYCn%$9{9IG>E(1qwJqu7?G?dWL^vl8x!a4C3Fp
z156wYXy&3kuv9N+E4=l|sA^mK(_|M!xGJ&=&pVjGQO7ZR#c<~C^9}gXO_9_IXJE10
zR@AE2g(Jf=v7z}M>~SoB0}Ed>&JR+VXU}%xc@qV8`tD+kpD`80PbD*5%T6*szXL??
zxWY+{JI*{|%0N0*iZ})zWLsP^8O|72dg)mTtn{yBEAlg7#6*4iG^2$Xq&x?$b9|Ut
zVsq%2q|eL&`*R$vvF*5Y)g}}^zz0!dS*9~hk*e6t0&!k3`|zL!ZQQH_L2Gu9Vs1QC
zK5JkcjGN%}yH=Z^!yS0#x+_NgG-Z{nBf&so2JU>ViZ_Jg$jToj*nL}+?$wc^D(*As
zhB#&Nkt}AE9y_w8#x2Z`c|O$H!2knPRl)w}H;miOFz$T>=k<DlScyFHj@3a-!fUYD
z_mdBg_dwW?!%U3udNyvbFf-ZB42Ql(COBQWz93#2GvsbC^Ia-o?WY)`v2ZqX;Hy09
zPm;q3n<V^pLZ2+#6ifzf-wLOb^68BNK5$!?(kmBiSzh6D447R;0#^sq+&m+ce6gBr
zO@7GKC04UHuij-0MGr&u=Pthc1wB~hu?D2CzG7E?cu6-ON@vBFs8DE1V$7Cmk&^JK
zWb2wSP$?lzJEA<G@-P=yn7h$<H!-pz=_(p*SVav}%~|oKsjzUX9`0SW4zeq+(A-@+
zY5U5#F!_27hOU|k8xj@ie3w}2w|@{yiav$brJ2m+&b6#Z?n|&;vIw_*GNqR9OtD3L
zD1D{45=@rWu&KtQ(DcYLcG$<C_3r1h$Z#)Frq@~<j0c@0)NZ%U8lB;!x^^vze~`ue
zG(CW)uSB!Y3!XE}e2iF&RZGy*U^d%kF`UdPOJ~AMr@)wmO7_$E{(fGGHaxqy2%R)v
zkrg|x;)q5c_$;ByE;v$*JGW?(5}z)V5IPCRj*6i|*ip#V8w!i<7qN>A({YEG7;F7`
zJ!@%x9#pe4fG-ND_INHhCrzX;{Ep%RA5Zw;m4piS=fU98WNiKU0Irk{rHRY)VT5oZ
zoj<mh*%K{?QBBA3mbomx47rF)rLwUkDjmk#sk7f*WkGqe4t><s|Gbac4HHtHfpSO~
zXvFrnxt;Dn_L>rGs!YS;&#q*+pBtH9YeGu6!Qj}PK@FrnLHS5ss;cG4+;6r-m*WQs
zO|Sy1$0zCov|eF|uL1<HV>zSM&f@XQOW@_%39$0ydg{jE@z+XE2VHzZm%TOz1wlT2
z+_@Y5o}2<sS|&&hXYq${EzSKDh)-8qz??yq82R-+zUZ(3uMZ_Coms|N>yia-o|$;B
zPaD$2MTnP-D|Fqz$eiif1UWZy@%q7VzRgizRzrIqNZ9LBorv8qZqjmkX!uVsQAq{s
zZ$@-#;a#|Td?N&yw!+RMPnl_kxy;>>Ts)Qj1j_xT$exR?gcs~gil?)zjCmyUL_?V>
z<eSiIyNaRJ=oAyRvJMX~{S3A}EvWJ?kn|Ot2d`UcL~nTz|HZrY@ZQ1<Pdi4T%>`;x
zQ7uE)_9@dNgAy1+1Aqx{_v0~-XY8B{ENgOB2zC4~GQ*<wfKIpuewB=Z8HddIzGeHt
z*(DR54p^`v1{27MHCg<ZV+LbWp*`I0zRG+(9EJ(u&3MY<G5>v2K3=YT!P(p74dEI`
zShJIAa8)mb+JA9?kwx#Yd)7K|eD{@Y%{auE56K02;VH1%_aqZm?#(#I2El95Jxr#!
zF?^i5h21quhhA6tjLL6XF<bjK4k<Xw%(qadyHfa!TMx_L*0;w?eH9SPN)pqM3NY5W
zj63!mguGRScy8TS2z<RA;=Z`T?Gyo&ZI>r6J>`gNcPVzy+l*HE!^x^lLeJP6vvXfq
zW1XlRFeSE#FKzJo`s+CT(sEQ-@)NtJdb0YX8gTw}8xsEN7_-_}p}+sN9@Ylcg0!k4
ze%&yd7Szhncq=9PF8Ug?y_Ey0DL>I<moT(ON<!oNQb<gYAd@#L(;Wx&`C=8DA>tTt
z_VS|nb^f_<JZU}31dSkv+fTBSr&Y3d52iq@+G+fnt`B+1$r$G^LMz_)G8&92KE!U;
zEHwnXR`>C@H9iKDf_u!R4g0XgOPW5;OJT~sT)~<j>-eV&W@6Xkubl9f#UNdDfjwCN
z6GK!#K~jYQ=gGP%{QP+fN`^}V@^(T-?RXk8%9P#Wzr3z%(n+W=3SnMG=D?#ncFeto
zP^Kz00-qTSB^<sQ9ld!4au+x<wNH}pWvnCRml*c<pX;L7HVtOi;529qZ-I@wH{+19
z(e&flolxSO&hT#@!_yk`VMuZTR6JI~gVqztxD}UB_<0;k#vKJcI)VSrJqJDRK7ca2
z2>zFxQ84XUHCDC_V@7?=WbeGb%eTy}!69OG@M^F-d%M3cf5fpFV3lRUe7-9Mt?R$w
znqDdVzF`OCYEtOAWe)+Fw$$b7c9^9ohwqOE^7BgOQziL#U|DHFJH2kRC;V39O4bL8
zR14X+7UnQ^_$PQ3!4QYo&)B-@1dg3^23@{8^V^C8U~_>i$yfLR0g+Si?8gjxL(~yn
zH(Js;+UnH1auWE)>JY!W@pOOAQ_k~`D)fGdDZQYa!v?N;jUSKow=HCDaenY8(i2)=
zQ1PTD4V=}0gVmF$hKCJGeT(L+oq7by6GyYT*~#?7vGZVBrv_@XIsDU`9x)yFOmOum
z2jcbZD>jm)G%jj4X#2%M^BiqHqg_E(j?F|*aV;j~j4X`O4ky-1lC-q$HeM6i2OW<*
z>4^&)!S?P*YOpDQIcT;WF8STTq|2scdV~|(qFw+OO_T9{;6oTEWkhukC=dltTRPpY
zgmT&qQNB@@UjMomj~N)?^rh9-UtWoVplKz$t*aT+Z;vOnp_1fW-!+)DQHpprTeIh4
zp2Me50j6&aWY+5sC8j-t$X)IOX8j-~a4KqJ3Y@C?{h+hxG37IVLF)!Q;JO3#4~P@J
z5JkGaND1Dw?Scut`}ryoR~TXc`uau3G?}im1+Yp5u=(>Gc%`$Zznyv-&K0ghqlkmh
ze7c$lt#&5&rp2Q<zY^uG1L_wp@FmLwrej8GJm=KoR9IJ;&&J<%r1jkwSho==FiFda
znv!(7_w906Jv4|dYa2=f#=Xa<A*%I)Hz)9$;b<7PQVaq^tN1l<Q$c1;IlfY=V+zJ*
zG0ZI&V$xNCHL;cW^YVG-_&fnjw+ctavqKnL>ni5J4S6uTZvxf*{gMy8Em@hELYyiX
z0!?FVAxC}>BmZL>-sPWX$88@&r?`E=vhLl`Whn`^6<ctL-y4pt!zs9y+y9wU7s7mS
zlL9tU6}1mfgQY_UW9Y6W%+<rykh!UVJioO8J}BLVA=9U`k9U4&G`xqwnEr7vuZwc)
z4?nLWtL+b9t)4o(S$r0ETDP+6N^Wu%g(foF&W$GFUDIIf(z!G+y9oju_oG4PYPvY5
zzyI_luz9afVv@ZZOmNKShbna-dc9_QNh{11vn5k2%rJCDAdyUzBF$%|s9SC$U;nK>
zOtCu-LyJ=3QrJCkd%hJs&0WyQDI8YL>4M7Ix%5U&KC?u2ESNRb^H<VQ^t;6kcF7`H
zP&e0S0_r;PY;h`c$`SEhO&(U;cfbnejkx&OeU$f$WiM^vfV;{o_Coj|u=~V-&t6UH
zD-gn4?#0L-brZbRSFm%9zJr;i7`<Kp7<KfuaE6os$Bj~;#u|Y%Rig+Tt#7gJ`Nx@c
zukGlLSx4|@R~##}Q<A-KY#%<*WZ9Xf`f&VF4;BapLvwu;=-5w2(+oM>SaSmQMvq|L
zNDV`Y7z1de>3Hc!0ChaL9xRnqK;L^dQ)c8sCM-&U4cn4oI9_9(x36Y{Q}*H!ZX$o3
zK!;=MW=f-JIy5Eh!Q7`iu&TV3U9xi_%D&w~ZuDfc`g|2K%3X$dc4{#v!x~^1lLMFf
zH0VJKhPp2`=6o@*M=9S6%=gtz=yT2%PMS*cI|ljFv}9Fy*eOl!`&2QnBRA2;<$8=s
zyD%!(lwsG6^>niN0_Yku4)<j&r?)rHpzUiM==wWCu&otnyj=lm`9J1&@m5i_a#K21
z-H+V&oDNfNN5G}x8RW9iG|0Su4;P+mVAWrUk^%p~M89$C|HA*U9Pkee1!tMvCntgC
z%qh%x5(*iC_u+J|FP?IiVr_h4aPr4IW~;9Z9$m4CsSU|yRz9wPNXINjI=-Fd-O6TK
z4i2M@yPNsK>Vt^pqmQiZ5iQnVEDvste!$$AbC{E!{E4aP8^U&vZsM!oxnIlUR5QhO
ziIBN3gz?*U6bJN6U3hHtzwkdM4Cp`T`i6fZe3ngjbus5k=W+hkcXv5mfs!l^UCc<H
znZ%zqKMp2;*vC1(EE1<n9$^m*nZ<rsA;M~KyZ8><c607sQl(;!Q`vnJ!&s3dKPEyW
znJp<@!}n;BU~jCHur}U(oZ(mL*m#dj<dn1B{Pq*ItffpNqgt%MkJQiL%w$e+UVK;K
zq)0yC%YH*<M65FVLH0d6FR_6ssDH)je0B?;BrId3FEzsC22Cii-paYW>k7wPLJmq&
zPIHobrq@qYScOqnsv$RjIrL5($G`t040ydV%-&!{&Ucp^h>x8(j$fa#S{F>1CFR;s
z*>CD>5<O+3?<2!$U`JA22Uqq1Z!pz+E=BC#F2cx_`ON0Az9=F)i0b<bLF`L$x<>pt
zlX*1=hHhg}ATg9g-*HB@TXG=8_h*|lY@mIk4>Q|C7K!IKw*B)+e(jE(*e)@h&zrZF
zX-F7KtNnV=wkC+`mz47dy*G!bp1U?3jfbG^nKxd|eh5bSlISv{jPr5x3nrJ%z$*#m
z?9^p<S+#sE?2tIjh;4epSKB=bukb{0j_fEL;bh0&&p8B&8SlW&Cl?-jP<Cjo6D!F(
zfq5sF!>+zJ?3Dc{!Ao{0WXiu_G|Fc(Td%LdMCojBk+TBM$ZII6(Z*<cIkRm&ZA^H)
zA-k?9gR?O3Gbe(l!Ma*&qQ@>5mOHkNy&Nh9+|Nmz;f=%CwEa~a<+G!4l!Gpw+Sbfm
z_<@XVr4OV0qmiSdI}7BF4Pmc}DYKT#-|^pHkOfDrL!8Xna?GLhHqPm^x$G04D@>5r
zR7Sx_6KAZFV+Qg!^l2n-tw_Ir*I)hv|ITV4e^ofkP-@K<zTPb#eq!<wMtgh6Kc1%U
z_nX@k`~ARy@;`IR3Yq-(n67_~NS|zH@c$gB{l~10|GQx>IdBEnix+zJix(Ow?#{vk
ztN)%8^81MK0h9I5<?k6Hf1`gNn<n!Iw6}i*`tN8H)4#F5j|=+~drj&u?D)U2zk9-e
zVzZ?G!kYb!{oRlK6MIeVFRc0B*x#SUe`2%L|H4}Qjs5*O_$T(7(O+20zp=mP;Gftm
z<G-*I{>J{EYky+h$Nz=3`WyRu=E(l>|CaxT{a0?fI}aLU_^+T*qW#zOuSN2&^_+ha
nP)JDL?AL*Q+`xPwp(Pf-wg)EuxI2pv%=;0N?EekK|BC%T!Zc6A

literal 0
HcmV?d00001

diff --git a/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf b/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf
index 51a84eee..f7f55186 100644
--- a/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf
+++ b/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf
@@ -562,7 +562,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="15_left_shoulder_pitch"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0.01911 0.17608 0.30392"
       rpy="0 0 0" />
@@ -607,7 +607,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="16_left_shoulder_abad"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 .05760 0"
       rpy="0.0 0 0" />
@@ -660,7 +660,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="17_left_shoulder_yaw"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.10250"
       rpy="0.0 0 0" />
@@ -711,7 +711,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="18_left_elbow"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.15750"
       rpy="0 0 0.0" />
@@ -790,7 +790,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="11_right_shoulder_pitch"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0.01911 -0.17608 0.30392"
       rpy="0 0 0" />
@@ -835,7 +835,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="12_right_shoulder_abad"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 -.05760 0"
       rpy="0.0 0 0" />
@@ -888,7 +888,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="13_right_shoulder_yaw"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.10250"
       rpy="0.0 0 0" />
@@ -939,7 +939,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="14_right_elbow"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.15750"
       rpy="0 0 0.0" />
diff --git a/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
new file mode 100644
index 00000000..5e76f34a
--- /dev/null
+++ b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
@@ -0,0 +1,996 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<!-- URDF for MIT Humanoid
+Version Specifics: "Full" and "Simple Foot"
+Full: collision geometries for all links
+Simple Foot: foot approximated as single box-contact -->
+
+<robot
+  name="humanoid">
+  <link
+    name="base">
+    <inertial>
+      <origin
+        xyz="0.009598 0.000935 0.151714"
+        rpy="0 0 0" />
+      <mass value="7.954054" />
+      <inertia
+        ixx="0.168459"
+        ixy="0.000124"
+        ixz="0.006493"
+        iyy="0.101358"
+        iyz="0.000278"
+        izz="0.091754" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="-0.00565 0 -0.05735"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/torso.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.752941176470588 0.752941176470588 0.752941176470588 1" />
+      </material>
+    </visual>
+     <collision>
+            <origin rpy="0 0 0" xyz="0.023 0. 0.08"/>
+            <geometry>
+                <box size="0.15 0.3 0.35"/>
+            </geometry>
+    </collision>
+  </link>
+  <link
+    name="left_hip_yaw">
+    <inertial>
+      <origin
+        xyz="-0.065677489200101  -0.001868461810995  -0.063608863694661"
+        rpy="0 0 0" />
+      <mass value="1.03610198878872" />
+      <inertia
+        ixx="0.00567730565299"
+        ixy="-0.000123286844996"
+        ixz="-0.003803404981833"
+        iyy="0.01000676842184"
+        iyz="-0.000123074285517"
+        izz="0.005622832879212" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_yaw.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="06_left_hip_yaw"
+    type="revolute">
+    <origin
+      xyz="-0.00565 0.082 -0.05735"
+      rpy="0 -0.174533 0" />
+    <parent
+      link="base" />
+    <child
+      link="left_hip_yaw" />
+    <axis
+      xyz="0 0 -1" />
+    <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  <link
+    name="left_hip_abad">
+    <inertial>
+      <origin
+        xyz="0.058112835965384 -0.013505242569335 0.014569589589137"
+        rpy="0 0 0" />
+      <mass value="1.10845642066563" />
+      <inertia
+        ixx="0.001946765858581"
+        ixy="0.000875106562403"
+        ixz="-0.000401465299746"
+        iyy="0.005349108237236"
+        iyz="0.000216871363655"
+        izz="0.005114615354879" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_abad.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="07_left_hip_abad"
+    type="revolute">
+    <origin
+      xyz="-0.06435 0 -.07499"
+      rpy="0.0 0.436332 0.0"/>
+    <parent
+      link="left_hip_yaw" />
+    <child
+      link="left_hip_abad" />
+    <axis
+      xyz="1 0 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+ <link 
+    name="left_upper_leg">
+    <inertial>
+      <origin
+        xyz="0.004372713492862  0.025610933851324 -0.09370979295432"
+        rpy="0 0 0" />
+      <mass value="2.9466623694791" />
+      <inertia
+        ixx="0.056092231067477"
+        ixy="-0.000490336872141"
+        ixz="0.004048303589526"
+        iyy="0.055684035451577"
+        iyz="0.009711050120379"
+        izz="0.005614311941973" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0.01283 0 -0.0174"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_upper.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.16"/>
+        <geometry>
+            <cylinder radius="0.035" length="0.16"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="08_left_hip_pitch"
+    type="revolute">
+    <origin
+      xyz="0.07133 -0.002 0.0"
+      rpy="0 -0.261799 0" />
+    <parent
+      link="left_hip_abad" />
+    <child
+      link="left_upper_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="72" lower="-3" upper="3" velocity="40"/>
+  </joint>
+
+  <link
+    name="left_lower_leg">
+    <inertial>
+      <origin
+        xyz="-0.014401204856919 -0.001696539969774 0.027394674089734"
+        rpy="0 0 0" />
+      <mass value="0.238357586915263" />
+      <inertia
+        ixx="0.010975524578483"
+        ixy="0.000008"
+        ixz="0.000491443762571"
+        iyy="0.011034922309191"
+        iyz="0.00009"
+        izz="0.000294128826005" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_lower.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.05" length="0.13"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="09_left_knee"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2666"
+      rpy="0 0 0" />
+    <parent
+      link="left_upper_leg" />
+    <child
+      link="left_lower_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="144" lower="0." upper="3." velocity="20"/>
+  </joint>
+
+   <link
+    name="left_foot">
+    <inertial>
+      <origin
+        xyz="0.003235539757018 -0.000321708855445 -0.024835658864996"
+        rpy="0 0 0" />
+      <mass value="0.434332672008075" />
+      <inertia
+        ixx="0.000370048860637"
+        ixy="0.000000361777951684732"
+        ixz="-0.000118105634294"
+        iyy="0.002398432542765"
+        iyz="-0.00000349674373383071"
+        izz="0.002083498286479" />
+    </inertial>
+    <!-- Foot inertia Iyy default: 0.000806
+    Modified to account for reflected motor inertia -->
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_foot.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.63671875 0.12109375 0.752941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <!-- <origin rpy="1.57079 0 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+            <box size="0.20 0.02 0.04"/>
+        </geometry> -->
+        <origin rpy="0 1.57079 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+            <cylinder length="0.15" radius="0.02"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="10_left_ankle"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2785"
+      rpy="0 0 0" />
+    <parent
+      link="left_lower_leg" />
+    <child
+      link="left_foot" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="68" lower="-0.97" upper="0.97" velocity="24"/>
+  </joint>
+ <link
+    name="right_hip_yaw">
+    <inertial>
+      <origin
+        xyz="-0.065184991281157 0.001391305332592 -0.064121979944007"
+        rpy="0 0 0" />
+      <mass value="0.944770728939119" />
+      <inertia
+        ixx="0.005357052181805"
+        ixy="0.0000818747123"
+        ixz="-0.003427894976548"
+        iyy="0.00923535702765"
+        iyz="0.000084203916519"
+        izz="0.005156514191139" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_yaw.stl" scale="0.001 -0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="01_right_hip_yaw"
+    type="revolute">
+    <origin
+      xyz="-0.00565 -0.082 -0.05735"
+      rpy="0 -0.174533 0" />
+    <parent
+      link="base" />
+    <child
+      link="right_hip_yaw" />
+    <axis
+      xyz="0 0 1" />
+    <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  
+  <link
+    name="right_hip_abad">
+    <inertial>
+      <origin
+        xyz="0.052141443908008  0.013947510011721  0.024209980877026"
+        rpy="0 0 0" />
+      <mass value="2.22134334871633" />
+      <inertia
+        ixx="0.003252746154843"
+        ixy="-0.001623511116749"
+        ixz="-0.002263316469026"
+        iyy="0.008719830483193"
+        iyz="-0.000745379387761"
+        izz="0.007646079982067" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_abad.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="02_right_hip_abad"
+    type="revolute">
+    <origin
+      xyz="-0.06435 0 -.07499"
+      rpy="0.0 0.436332 0.0"/>
+    <parent
+      link="right_hip_yaw" />
+    <child
+      link="right_hip_abad" />
+    <axis
+      xyz="-1 0 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  
+ <link
+    name="right_upper_leg">
+    <inertial>
+      <origin
+        xyz="0.006755522658048 -0.024808560969254 -0.091196015308728"
+        rpy="0 0 0" />
+      <mass value="3.20991362323237" />
+      <inertia
+        ixx="0.054987344909268"
+        ixy="0.000849211308416"
+        ixz="0.006853857315879"
+        iyy="0.055272867443404"
+        iyz="-0.009594753352539"
+        izz="0.006327830944097" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0.01283 0 -0.0174"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_upper.stl" scale="0.001 -0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.16"/>
+        <geometry>
+            <cylinder radius="0.035" length="0.16"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="03_right_hip_pitch"
+    type="revolute">
+    <origin
+      xyz="0.07133 0.002 0.0"
+      rpy="0 -0.261799 0" />
+    <parent
+      link="right_hip_abad" />
+    <child
+      link="right_upper_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="72" lower="-3" upper="3" velocity="40"/>
+  </joint>
+  
+  <link
+    name="right_lower_leg">
+    <inertial>
+      <origin
+        xyz="-0.021181283533562 0.001390129997816 0.007356617786071"
+        rpy="0 0 0" />
+      <mass value="0.224587667752391" />
+      <inertia
+        ixx="0.010215815580851"
+        ixy="0.000006180468"
+        ixz="0.000362813548474"
+        iyy="0.010326014694671"
+        iyz="-0.0000402996255"
+        izz="0.000352688938011" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_lower.stl" scale="0.001 -0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.05" length="0.13"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="04_right_knee"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2666"
+      rpy="0 0 0" />
+    <parent
+      link="right_upper_leg" />
+    <child
+      link="right_lower_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="144" lower="0." upper="3.0" velocity="20"/>
+  </joint>
+  
+   <link
+    name="right_foot">
+    <inertial>
+      <origin
+        xyz="0.008967719334277 0.00070566689364 -0.023317389576155"
+        rpy="0 0 0" />
+      <mass value="0.48135110558567" />
+      <inertia
+        ixx="0.000362930457918"
+        ixy="-0.00000178237"
+        ixz="-0.000047781"
+        iyy="0.002320413729313"
+        iyz="0.00000826302"
+        izz="0.002012953781741" />
+    </inertial>
+    <!-- Foot inertia Iyy default: 0.000806
+    Modified to account for reflected motor inertia -->
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_foot.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <!-- <origin rpy="1.57079 0 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+            <box size="0.20 0.02 0.04"/>
+        </geometry> -->
+        <origin rpy="0 1.57079 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+          <cylinder length="0.15" radius="0.02"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="05_right_ankle"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2785"
+      rpy="0 0 0" />
+    <parent
+      link="right_lower_leg" />
+    <child
+      link="right_foot" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="68" lower="-0.97" upper="0.97" velocity="24"/>
+  </joint>
+  <!-- Left Arm -->
+  <link
+    name="left_shoulder">
+    <inertial>
+      <origin
+        xyz="0.009518 0.053027 0.000060"
+        rpy="0 0 0" />
+      <mass value="0.788506" />
+      <inertia
+        ixx="0.001363 "
+        ixy="0.000024 "
+        ixz=" -0.000002 "
+        iyy="0.000759 "
+        iyz="  0.000001"
+        izz="  0.000866" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder1.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="15_left_shoulder_pitch"
+    type="fixed">
+    <origin
+      xyz="0.01346 0.17608 0.24657"
+      rpy="0 0 0" />
+    <parent
+      link="base" />
+    <child
+      link="left_shoulder" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  
+  <link
+    name="left_shoulder_2">
+    <inertial>
+      <origin
+        xyz="0.000117 0.000122 -0.082758"
+        rpy="0 0 0" />
+      <mass value="0.801249" />
+      <inertia
+        ixx=" 0.001136 "
+        ixy="0.000001 "
+        ixz="0.000016 "
+        iyy="0.001175 "
+        iyz=" 0.000001 "
+        izz="0.001241 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder2.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="16_left_shoulder_abad"
+    type="fixed">
+    <origin
+      xyz="0 .05760 0"
+      rpy="0.0 0 0" />
+    <parent
+      link="left_shoulder" />
+    <child
+      link="left_shoulder_2" />
+    <axis
+      xyz="1 0 0" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  <!--  -->
+  
+  <link
+    name="left_upper_arm">
+    <inertial>
+      <origin
+        xyz="-0.000073 -0.016471  -0.063210"
+        rpy="0 0 0" />
+      <mass value="0.905588" />
+      <inertia
+        ixx=" 0.001353 "
+        ixy="0.000002 "
+        ixz="0.000001 "
+        iyy="0.001804 "
+        iyz="-0.000061 "
+        izz="0.000796 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder3.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0 -0.01 -0.035"/>
+        <geometry>
+            <!-- <box size="0.1 0.085 0.15"/> -->
+            <cylinder radius="0.05" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="17_left_shoulder_yaw"
+    type="fixed">
+    <origin
+      xyz="0 0 -.10250"
+      rpy="0.0 0 0" />
+    <parent
+      link="left_shoulder_2" />
+    <child
+      link="left_upper_arm" />
+    <axis
+      xyz="0 0 -1" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+
+  <link
+    name="left_lower_arm">
+    <inertial>
+      <origin
+        xyz="-0.007103 -0.000073 -0.099203"
+        rpy="0 0 0" />
+      <mass value="0.348390" />
+      <inertia
+        ixx="0.003400 "
+        ixy="0.0 "
+        ixz="-0.000049 "
+        iyy="0.003453 "
+        iyz="-0.000002 "
+        izz="0.000078 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_forearm.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="-0.01 0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.025" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="18_left_elbow"
+    type="fixed">
+    <origin
+      xyz="0 0 -.15750"
+      rpy="0 0 0.0" />
+    <parent
+      link="left_upper_arm" />
+    <child
+      link="left_lower_arm" />
+    <axis
+      xyz="0 1 0" />
+        <limit effort="55" lower="-3" upper="3" velocity="30"/>
+  </joint>
+  <link
+    name="left_hand">
+    <inertial>
+      <origin
+        xyz="0.0 -0.0 -0.27"
+        rpy="0 0 0" />
+      <mass value="0.001" />
+      <inertia
+        ixx="0.0"
+        ixy="0.0"
+        ixz="0.0"
+        iyy="0.0"
+        iyz="0.0"
+        izz="0.0" />
+    </inertial>
+      <collision>
+        <origin rpy="1.57079 0 0" xyz="0.0 0.0 0."/>
+        <geometry>
+            <cylinder radius="0.025" length="0.015"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="left_hand_link"
+    type="fixed">
+    <origin
+      xyz="0 0 -0.27"
+      rpy="0 0 0" />
+    <parent
+      link="left_lower_arm" />
+    <child
+      link="left_hand" />
+  </joint>
+
+  <!-- Right Arm -->
+  <link
+    name="right_shoulder">
+    <inertial>
+      <origin
+        xyz="0.009518 -0.053027 0.000060"
+        rpy="0 0 0" />
+      <mass value="0.788506" />
+      <inertia
+        ixx="0.001363 "
+        ixy="-0.000024 "
+        ixz=" 0.000002 "
+        iyy="0.000759 "
+        iyz="  0.000001"
+        izz="  0.000866" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder1.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="11_right_shoulder_pitch"
+    type="fixed">
+    <origin
+      xyz="0.01346 -0.17608 0.24657"
+      rpy="0 0 0" />
+    <parent
+      link="base" />
+    <child
+      link="right_shoulder" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+
+  <link
+    name="right_shoulder_2">
+    <inertial>
+      <origin
+        xyz="0.000117 -0.000122 -0.082758"
+        rpy="0 0 0" />
+      <mass value="0.801249" />
+      <inertia
+        ixx=" 0.001136 "
+        ixy="-0.000001 "
+        ixz="-0.000016 "
+        iyy="0.001175 "
+        iyz=" 0.000001 "
+        izz="0.001241 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder2.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="12_right_shoulder_abad"
+    type="fixed">
+    <origin
+      xyz="0 -.05760 0"
+      rpy="0.0 0 0" />
+    <parent
+      link="right_shoulder" />
+    <child
+      link="right_shoulder_2" />
+    <axis
+      xyz="-1 0 0" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  <!--  -->
+
+  <link
+    name="right_upper_arm">
+    <inertial>
+      <origin
+        xyz="-0.000073 0.016471  -0.063210"
+        rpy="0 0 0" />
+      <mass value="0.905588" />
+      <inertia
+        ixx=" 0.001353 "
+        ixy="-0.000002 "
+        ixz="-0.000001 "
+        iyy="0.001804 "
+        iyz="-0.000061 "
+        izz="0.000796 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder3.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0 -0.01 -0.035"/>
+        <geometry>
+            <!-- <box size="0.1 0.085 0.15"/> -->
+            <cylinder radius="0.05" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="13_right_shoulder_yaw"
+    type="fixed">
+    <origin
+      xyz="0 0 -.10250"
+      rpy="0.0 0 0" />
+    <parent
+      link="right_shoulder_2" />
+    <child
+      link="right_upper_arm" />
+    <axis
+      xyz="0 0 1" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+
+  <link
+    name="right_lower_arm">
+    <inertial>
+      <origin
+        xyz="-0.007103 -0.000073 -0.099203"
+        rpy="0 0 0" />
+      <mass value="0.348390" />
+      <inertia
+        ixx="0.003400 "
+        ixy="0.0 "
+        ixz="-0.000049 "
+        iyy="0.003453 "
+        iyz="-0.000002 "
+        izz="0.000078 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_forearm.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="-0.01 0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.025" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="14_right_elbow"
+    type="fixed">
+    <origin
+      xyz="0 0 -.15750"
+      rpy="0 0 0.0" />
+    <parent
+      link="right_upper_arm" />
+    <child
+      link="right_lower_arm" />
+    <axis
+      xyz="0 1 0" />
+        <limit effort="55" lower="-3" upper="3" velocity="30"/>
+  </joint>
+    <link
+    name="right_hand">
+    <inertial>
+      <origin
+        xyz="0.0 -0.0 -0.27"
+        rpy="0 0 0" />
+      <mass value="0.001" />
+      <inertia
+        ixx="0.0"
+        ixy="0.0"
+        ixz="0.0"
+        iyy="0.0"
+        iyz="0.0"
+        izz="0.0" />
+    </inertial>
+      <collision>
+        <origin rpy="1.57079 0 0" xyz="0.0 0.0 0."/>
+        <geometry>
+            <cylinder radius="0.025" length="0.015"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="right_hand_link"
+    type="fixed">
+    <origin
+      xyz="0 0 -0.27"
+      rpy="0 0 0" />
+    <parent
+      link="right_lower_arm" />
+    <child
+      link="right_hand" />
+  </joint>
+
+</robot>

From c9411cdf0c07b55d7095b07233abbf41f5eeb5c0 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 18 Mar 2024 14:04:09 -0400
Subject: [PATCH 04/26] some tuning

---
 gym/envs/mit_humanoid/mit_humanoid.py        | 17 ++++-
 gym/envs/mit_humanoid/mit_humanoid_config.py | 72 ++++++++++----------
 2 files changed, 49 insertions(+), 40 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 17167286..684f4cb3 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -111,8 +111,9 @@ def _reward_tracking_lin_vel(self):
 
     def _reward_dof_vel(self):
         # Penalize dof velocities
-        return torch.mean(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1)
-        # * self._switch("stand")
+        return torch.mean(
+            self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1
+        ) * self._switch("stand")
 
     def _reward_dof_near_home(self):
         return torch.mean(
@@ -131,7 +132,7 @@ def _reward_stand_still(self):
         rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
         rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
         rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
-        return rew_vel + rew_pos - rew_base_vel  # * self._switch("stand")
+        return rew_vel + rew_pos - rew_base_vel * self._switch("stand")
 
     def _reward_stance(self):
         phase = torch.maximum(
@@ -151,3 +152,13 @@ def _compute_grf(self, grf_norm=True):
             return torch.clamp_max(grf / self.cfg.asset.total_mass, 1.0)
         else:
             return grf
+
+    def _reward_walk_freq(self):
+        # Penalize deviation from base frequency
+        return torch.mean(
+            self._sqrdexp(
+                (self.oscillator_freq - self.cfg.oscillator.base_frequency)
+                / self.cfg.oscillator.base_frequency
+            ),
+            dim=1,
+        ) * self._switch("move")
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 826480e7..7a8680e8 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -70,15 +70,15 @@ class init_state(LeggedRobotCfg.init_state):
         root_pos_range = [
             [0.0, 0.0],  # x
             [0.0, 0.0],  # y
-            [0.7, 0.72],  # z
+            [0.7, 0.8],  # z
             [-0.1, 0.1],  # roll
             [-0.1, 0.1],  # pitch
             [-0.1, 0.1],
         ]  # yaw
 
         root_vel_range = [
-            [-0.5, 1.5],  # x
-            [-0.55, 0.55],  # y
+            [-0.5, 2.5],  # x
+            [-0.5, 0.5],  # y
             [-0.35, 0.1],  # z
             [-0.35, 0.35],  # roll
             [-0.35, 0.35],  # pitch
@@ -122,18 +122,18 @@ class commands:
         resampling_time = 10.0  # time before command are changed[s]
 
         class ranges:
-            lin_vel_x = [-1.0, 1.0]  # min max [m/s]
+            lin_vel_x = [-1.0, 4.0]  # min max [m/s]
             lin_vel_y = 1.0  # max [m/s]
             yaw_vel = 1  # max [rad/s]
 
     class push_robots:
         toggle = False
-        interval_s = 15
-        max_push_vel_xy = 0.05
-        push_box_dims = [0.1, 0.2, 0.3]  # x,y,z [m]
+        interval_s = 2
+        max_push_vel_xy = 0.6
+        push_box_dims = [0.1, 0.1, 0.3]  # x,y,z [m]
 
     class domain_rand:
-        randomize_friction = False
+        randomize_friction = True
         friction_range = [0.5, 1.25]
         randomize_base_mass = True
         added_mass_range = [-1.0, 1.0]
@@ -157,7 +157,6 @@ class asset(LeggedRobotCfg.asset):
         fix_base_link = False
         disable_gravity = False
         disable_motors = False
-        apply_humanoid_jacobian = False
         total_mass = 25.0
 
     class reward_settings(LeggedRobotCfg.reward_settings):
@@ -195,22 +194,22 @@ class scaling(LeggedRobotCfg.scaling):
         dof_pos = [
             0.1,
             0.2,
-            0.4,
-            0.4,
-            0.4,
+            0.8,
+            0.8,
+            0.8,
             0.1,
             0.2,
-            0.4,
-            0.4,
-            0.4,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
-            0.5,
+            0.8,
+            0.8,
+            0.8,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
         ]
         dof_pos_obs = dof_pos
         # # * Action scales
@@ -248,7 +247,7 @@ class policy(LeggedRobotRunnerCfg.policy):
         actor_hidden_dims = [512, 256, 128]
         critic_hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-        activation = "elu"
+        activation = "tanh"
 
         actor_obs = [
             "base_height",
@@ -266,13 +265,13 @@ class policy(LeggedRobotRunnerCfg.policy):
         actions = ["dof_pos_target"]
 
         class noise:
-            base_height = 0.0  # 0.05
-            dof_pos_obs = 0.0  # 0.0
-            dof_vel = 0.0  # 0.0
-            base_lin_vel = 0.0  # 0.1
-            base_ang_vel = 0.0  # 0.2
-            projected_gravity = 0.0  # 0.05
-            height_measurements = 0.0  # 0.1
+            base_height = 0.05
+            dof_pos_obs = 0.0
+            dof_vel = 0.0
+            base_lin_vel = 0.1
+            base_ang_vel = 0.2
+            projected_gravity = 0.05
+            height_measurements = 0.1
 
         class reward:
             class weights:
@@ -285,12 +284,13 @@ class weights:
                 action_rate2 = 0.001
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.0
-                dof_vel = 0.0
-                stand_still = 0.0
+                dof_vel = 0.25
+                stand_still = 0.25
                 dof_pos_limits = 0.25
-                dof_near_home = 0.1
+                dof_near_home = 0.25
                 stance = 1.0
                 swing = 1.0
+                walk_freq = 0.5
 
             class termination_weight:
                 termination = 15
@@ -300,7 +300,7 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
         value_loss_coef = 1.0
         use_clipped_value_loss = True
         clip_param = 0.2
-        entropy_coef = 0.001
+        entropy_coef = 0.002
         num_learning_epochs = 5
         # * mini batch size = num_envs*nsteps / nminibatches
         num_mini_batches = 4
@@ -308,8 +308,6 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
         schedule = "adaptive"  # could be adaptive, fixed
         gamma = 0.999
         lam = 0.95
-        discount_horizon = 0.5  # [s]
-        GAE_bootstrap_horizon = 0.2  # [s]
         desired_kl = 0.01
         max_grad_norm = 1.0
 

From 90ff50cc34ba6a41f442a3c732e3c53dd1c600d5 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 18 Mar 2024 19:57:46 -0400
Subject: [PATCH 05/26] implement history with (approximated) arbitrary
 sampling frequency and history length. Implemented for dof_pos_target,
 dof_pos, dof_vel.

---
 gym/envs/mit_humanoid/mit_humanoid.py        | 49 ++++++++++++++++++++
 gym/envs/mit_humanoid/mit_humanoid_config.py | 19 ++------
 2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 684f4cb3..a7fb56cc 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -15,6 +15,31 @@ def _init_buffers(self):
         self.oscillator_obs = torch.zeros(self.num_envs, 4, device=self.device)
         self.oscillator_freq = torch.zeros(self.num_envs, 2, device=self.device)
 
+        self.dof_pos_target_history = torch.zeros(
+            (self.num_envs, self.num_dof, 12), device=self.device
+        )
+        self._init_history_buffers()
+
+    def _init_history_buffers(self):
+        self.dof_pos_target_history = torch.zeros(
+            (self.num_envs, self.num_dof * self.cfg.env.history_length),
+            device=self.device,
+        )
+        self.dof_pos_history = torch.zeros(
+            self.num_envs,
+            self.num_dof * self.cfg.env.history_length,
+            device=self.device,
+        )
+        self.dof_vel_history = torch.zeros(
+            self.num_envs,
+            self.num_dof * self.cfg.env.history_length,
+            device=self.device,
+        )
+        self.history_counter = torch.zeros(self.num_envs, dtype=int, device=self.device)
+        self.history_threshold = int(
+            self.cfg.control.ctrl_frequency / self.cfg.env.history_frequency
+        )
+
     def _reset_system(self, env_ids):
         if len(env_ids) == 0:
             return
@@ -31,8 +56,15 @@ def _reset_system(self, env_ids):
         self.oscillator_obs = torch.cat(
             (self.oscillators.cos(), self.oscillators.sin()), dim=1
         )
+        self._reset_history_buffers(env_ids)
         return
 
+    def _reset_history_buffers(self, ids):
+        n = self.cfg.env.history_length
+        self.dof_pos_target_history[ids] = self.dof_pos_target[ids].tile(n)
+        self.dof_pos_history[ids] = self.dof_pos_target[ids].tile(n)
+        self.dof_vel_history[ids] = self.dof_pos_target[ids].tile(n)
+
     # compute_torques accounting for coupling, and filtering torques
     def _compute_torques(self):
         torques = _apply_coupling(
@@ -52,6 +84,7 @@ def _compute_torques(self):
     def _post_decimation_step(self):
         super()._post_decimation_step()
         self._step_oscillators()
+        self._update_history_buffers()
 
     def _step_oscillators(self, dt=None):
         if dt is None:
@@ -62,6 +95,22 @@ def _step_oscillators(self, dt=None):
             (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
         )
 
+    def _update_history_buffers(self):
+        self.history_counter += 1
+
+        ids = torch.nonzero(
+            self.history_counter == self.history_threshold, as_tuple=False
+        ).flatten()
+
+        self.dof_pos_target_history[ids].roll(self.num_dof, dims=1)  # check
+        self.dof_pos_target_history[ids, : self.num_dof] = self.dof_pos_target[ids]
+        self.dof_pos_history[ids].roll(self.num_dof, dims=1)  # check
+        self.dof_pos_history[ids, : self.num_dof] = self.dof_pos_target[ids]
+        self.dof_vel_history[ids].roll(self.num_dof, dims=1)  # check
+        self.dof_vel_history[ids, : self.num_dof] = self.dof_pos_target[ids]
+
+        self.history_counter[ids] = 0
+
     # --- rewards ---
 
     def _switch(self, mode=None):
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 7a8680e8..04433381 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -12,7 +12,9 @@ class env(LeggedRobotCfg.env):
         num_observations = 49 + 3 * 18  # 121
         num_actuators = 18
         episode_length_s = 5  # episode length in seconds
-        num_privileged_obs = num_observations
+
+        history_length = 3  # n samples
+        history_frequency = 10  # [Hz]
 
     class terrain(LeggedRobotCfg.terrain):
         pass
@@ -172,21 +174,6 @@ class reward_settings(LeggedRobotCfg.reward_settings):
         switch_threshold = 0.2
 
     class scaling(LeggedRobotCfg.scaling):
-        # * dimensionless time: sqrt(L/g) or sqrt(I/[mgL]), with I=I0+mL^2
-        # virtual_leg_length = 0.65
-        # dimensionless_time = (virtual_leg_length / 9.81) ** 0.5
-        # base_height = virtual_leg_length
-        # base_lin_vel = virtual_leg_length / dimensionless_time
-        # base_ang_vel = 3.14 / dimensionless_time
-        # dof_vel = 20  # ought to be roughly max expected speed.
-        # height_measurements = virtual_leg_length
-
-        # # todo check order of joints, create per-joint scaling
-        # dof_pos = 3.14
-        # dof_pos_obs = dof_pos
-        # # * Action scales
-        # dof_pos_target = dof_pos
-        # tau_ff = 0.1
         base_ang_vel = 2.5
         base_lin_vel = 1.5
         commands = 1

From 5020fd023f657116d0ebc5d254bcb98f20c2d215 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 18 Mar 2024 20:20:22 -0400
Subject: [PATCH 06/26] make pd gains specific for each robot in preparation
 for domain randomization of gains

---
 gym/envs/base/legged_robot.py                | 33 ++++++--------------
 gym/envs/mit_humanoid/mit_humanoid_config.py |  2 +-
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
index 1bc4623e..7fa2f0dd 100644
--- a/gym/envs/base/legged_robot.py
+++ b/gym/envs/base/legged_robot.py
@@ -481,41 +481,26 @@ def _init_buffers(self):
             get_axis_params(-1.0, self.up_axis_idx), device=self.device
         ).repeat((self.num_envs, 1))
         self.torques = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.p_gains = torch.zeros(
-            self.num_actuators, dtype=torch.float, device=self.device
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.d_gains = torch.zeros(
-            self.num_actuators, dtype=torch.float, device=self.device
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.dof_pos_target = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.dof_vel_target = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.tau_ff = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
 
         self.dof_pos_history = torch.zeros(
-            self.num_envs,
-            self.num_actuators * 3,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators * 3, dtype=torch.float, device=self.device
         )
         self.commands = torch.zeros(
             self.num_envs, 3, dtype=torch.float, device=self.device
@@ -582,8 +567,8 @@ def _init_buffers(self):
             found = False
             for dof_name in self.cfg.control.stiffness.keys():
                 if dof_name in name:
-                    self.p_gains[i] = self.cfg.control.stiffness[dof_name]
-                    self.d_gains[i] = self.cfg.control.damping[dof_name]
+                    self.p_gains[:, i] = self.cfg.control.stiffness[dof_name]
+                    self.d_gains[:, i] = self.cfg.control.damping[dof_name]
                     found = True
             if not found:
                 self.p_gains[i] = 0.0
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 04433381..1e079807 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -277,7 +277,7 @@ class weights:
                 dof_near_home = 0.25
                 stance = 1.0
                 swing = 1.0
-                walk_freq = 0.5
+                walk_freq = 0.0
 
             class termination_weight:
                 termination = 15

From 82968b164fd94cbbfe95efb6d303877ed4ba6b2b Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Tue, 19 Mar 2024 16:58:32 -0400
Subject: [PATCH 07/26] refactor sampled history to be more explicit and not
 overlap with `dof_pos_history` (used for smoothness reward). Some tuning.
 Change stance/swing reward to use the smoothed square wave.

---
 gym/envs/base/legged_robot.py                | 12 +--
 gym/envs/mit_humanoid/mit_humanoid.py        | 95 ++++++++++++--------
 gym/envs/mit_humanoid/mit_humanoid_config.py | 89 +++++++++---------
 scripts/play.py                              |  2 +-
 4 files changed, 109 insertions(+), 89 deletions(-)

diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
index 7fa2f0dd..d635fea7 100644
--- a/gym/envs/base/legged_robot.py
+++ b/gym/envs/base/legged_robot.py
@@ -110,10 +110,10 @@ def _post_decimation_step(self):
 
         self.base_height = self.root_states[:, 2:3]
 
-        n = self.num_actuators
-        self.dof_pos_history[:, 2 * n :] = self.dof_pos_history[:, n : 2 * n]
-        self.dof_pos_history[:, n : 2 * n] = self.dof_pos_history[:, :n]
-        self.dof_pos_history[:, :n] = self.dof_pos_target
+        self.dof_pos_history.roll(self.num_actuators)
+        self.dof_pos_history[:, : self.num_actuators] = (
+            self.dof_pos - self.default_dof_pos
+        )
         self.dof_pos_obs = self.dof_pos - self.default_dof_pos
 
         env_ids = (
@@ -133,7 +133,9 @@ def _reset_idx(self, env_ids):
         self._reset_system(env_ids)
         self._resample_commands(env_ids)
         # * reset buffers
-        self.dof_pos_history[env_ids] = 0.0
+        self.dof_pos_history[env_ids] = (
+            self.dof_pos[env_ids] - self.default_dof_pos
+        ).tile(3)
         self.episode_length_buf[env_ids] = 0
 
     def _initialize_sim(self):
diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index a7fb56cc..5db4ab7a 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -14,30 +14,28 @@ def _init_buffers(self):
         self.oscillators = torch.zeros(self.num_envs, 2, device=self.device)
         self.oscillator_obs = torch.zeros(self.num_envs, 4, device=self.device)
         self.oscillator_freq = torch.zeros(self.num_envs, 2, device=self.device)
+        self._init_sampled_history_buffers()
 
-        self.dof_pos_target_history = torch.zeros(
-            (self.num_envs, self.num_dof, 12), device=self.device
-        )
-        self._init_history_buffers()
-
-    def _init_history_buffers(self):
-        self.dof_pos_target_history = torch.zeros(
-            (self.num_envs, self.num_dof * self.cfg.env.history_length),
+    def _init_sampled_history_buffers(self):
+        self.sampled_history_dof_pos_target = torch.zeros(
+            (self.num_envs, self.num_dof * self.cfg.env.sampled_history_length),
             device=self.device,
         )
-        self.dof_pos_history = torch.zeros(
+        self.sampled_history_dof_pos = torch.zeros(
             self.num_envs,
-            self.num_dof * self.cfg.env.history_length,
+            self.num_dof * self.cfg.env.sampled_history_length,
             device=self.device,
         )
-        self.dof_vel_history = torch.zeros(
+        self.sampled_history_dof_vel = torch.zeros(
             self.num_envs,
-            self.num_dof * self.cfg.env.history_length,
+            self.num_dof * self.cfg.env.sampled_history_length,
             device=self.device,
         )
-        self.history_counter = torch.zeros(self.num_envs, dtype=int, device=self.device)
-        self.history_threshold = int(
-            self.cfg.control.ctrl_frequency / self.cfg.env.history_frequency
+        self.sampled_history_counter = torch.zeros(
+            self.num_envs, dtype=int, device=self.device
+        )
+        self.sampled_history_threshold = int(
+            self.cfg.control.ctrl_frequency / self.cfg.env.sampled_history_frequency
         )
 
     def _reset_system(self, env_ids):
@@ -56,14 +54,14 @@ def _reset_system(self, env_ids):
         self.oscillator_obs = torch.cat(
             (self.oscillators.cos(), self.oscillators.sin()), dim=1
         )
-        self._reset_history_buffers(env_ids)
+        self._reset_sampled_history_buffers(env_ids)
         return
 
-    def _reset_history_buffers(self, ids):
-        n = self.cfg.env.history_length
-        self.dof_pos_target_history[ids] = self.dof_pos_target[ids].tile(n)
-        self.dof_pos_history[ids] = self.dof_pos_target[ids].tile(n)
-        self.dof_vel_history[ids] = self.dof_pos_target[ids].tile(n)
+    def _reset_sampled_history_buffers(self, ids):
+        n = self.cfg.env.sampled_history_length
+        self.sampled_history_dof_pos_target[ids] = self.dof_pos_target[ids].tile(n)
+        self.sampled_history_dof_pos[ids] = self.dof_pos_target[ids].tile(n)
+        self.sampled_history_dof_vel[ids] = self.dof_pos_target[ids].tile(n)
 
     # compute_torques accounting for coupling, and filtering torques
     def _compute_torques(self):
@@ -84,7 +82,7 @@ def _compute_torques(self):
     def _post_decimation_step(self):
         super()._post_decimation_step()
         self._step_oscillators()
-        self._update_history_buffers()
+        self._update_sampled_history_buffers()
 
     def _step_oscillators(self, dt=None):
         if dt is None:
@@ -95,21 +93,24 @@ def _step_oscillators(self, dt=None):
             (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
         )
 
-    def _update_history_buffers(self):
-        self.history_counter += 1
+    def _update_sampled_history_buffers(self):
+        self.sampled_history_counter += 1
 
         ids = torch.nonzero(
-            self.history_counter == self.history_threshold, as_tuple=False
+            self.sampled_history_counter == self.sampled_history_threshold,
+            as_tuple=False,
         ).flatten()
 
-        self.dof_pos_target_history[ids].roll(self.num_dof, dims=1)  # check
-        self.dof_pos_target_history[ids, : self.num_dof] = self.dof_pos_target[ids]
-        self.dof_pos_history[ids].roll(self.num_dof, dims=1)  # check
-        self.dof_pos_history[ids, : self.num_dof] = self.dof_pos_target[ids]
-        self.dof_vel_history[ids].roll(self.num_dof, dims=1)  # check
-        self.dof_vel_history[ids, : self.num_dof] = self.dof_pos_target[ids]
+        self.sampled_history_dof_pos_target[ids].roll(self.num_dof, dims=1)  # check
+        self.sampled_history_dof_pos_target[ids, : self.num_dof] = self.dof_pos_target[
+            ids
+        ]
+        self.sampled_history_dof_pos[ids].roll(self.num_dof, dims=1)  # check
+        self.sampled_history_dof_pos[ids, : self.num_dof] = self.dof_pos_target[ids]
+        self.sampled_history_dof_vel[ids].roll(self.num_dof, dims=1)  # check
+        self.sampled_history_dof_vel[ids, : self.num_dof] = self.dof_pos_target[ids]
 
-        self.history_counter[ids] = 0
+        self.sampled_history_counter[ids] = 0
 
     # --- rewards ---
 
@@ -167,7 +168,7 @@ def _reward_dof_vel(self):
     def _reward_dof_near_home(self):
         return torch.mean(
             self._sqrdexp(
-                (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"]
+                (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos"]
             ),
             dim=1,
         )
@@ -184,15 +185,17 @@ def _reward_stand_still(self):
         return rew_vel + rew_pos - rew_base_vel * self._switch("stand")
 
     def _reward_stance(self):
-        phase = torch.maximum(
-            torch.zeros_like(self.oscillators), -self.oscillators.sin()
-        )  # positive during swing, negative during stance
+        # phase = torch.maximum(
+        #     torch.zeros_like(self.oscillators), -self.oscillators.sin()
+        # )  # positive during swing, negative during stance
+        phase = self.smooth_sqr_wave(self.oscillators)
         return (phase * self._compute_grf()).mean(dim=1)
 
     def _reward_swing(self):
-        phase = torch.maximum(
-            torch.zeros_like(self.oscillators), self.oscillators.sin()
-        )  # positive during swing, negative during stance
+        # phase = torch.maximum(
+        #     torch.zeros_like(self.oscillators), self.oscillators.sin()
+        # )  # positive during swing, negative during stance
+        phase = self.smooth_sqr_wave(self.oscillators + torch.pi)
         return -(phase * self._compute_grf()).mean(dim=1)
 
     def _compute_grf(self, grf_norm=True):
@@ -202,6 +205,9 @@ def _compute_grf(self, grf_norm=True):
         else:
             return grf
 
+    def smooth_sqr_wave(self, phase, sigma=0.2):  # sigma=0 is step function
+        return phase.sin() / (2 * torch.sqrt(phase.sin() ** 2.0 + sigma**2.0)) + 0.5
+
     def _reward_walk_freq(self):
         # Penalize deviation from base frequency
         return torch.mean(
@@ -211,3 +217,14 @@ def _reward_walk_freq(self):
             ),
             dim=1,
         ) * self._switch("move")
+
+    def _reward_hips_forward(self):
+        # reward hip motors for pointing forward
+        hip_yaw_abad = torch.stack((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
+        hip_yaw_abad -= torch.stack(
+            (self.default_dof_pos[:, 0:2], self.default_dof_pos[:, 5:7]), dim=1
+        )
+        hip_yaw_abad /= torch.stack(
+            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7]), dim=1
+        )
+        return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 1e079807..89359a8c 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -9,12 +9,11 @@
 class MITHumanoidCfg(LeggedRobotCfg):
     class env(LeggedRobotCfg.env):
         num_envs = 4096
-        num_observations = 49 + 3 * 18  # 121
         num_actuators = 18
         episode_length_s = 5  # episode length in seconds
 
-        history_length = 3  # n samples
-        history_frequency = 10  # [Hz]
+        sampled_history_length = 3  # n samples
+        sampled_history_frequency = 10  # [Hz]
 
     class terrain(LeggedRobotCfg.terrain):
         pass
@@ -29,17 +28,17 @@ class init_state(LeggedRobotCfg.init_state):
         default_joint_angles = {
             "hip_yaw": 0.0,
             "hip_abad": 0.0,
-            "hip_pitch": -0.4,
-            "knee": 0.9,
-            "ankle": -0.45,
+            "hip_pitch": -0.667751,
+            "knee": 1.4087,
+            "ankle": -0.708876,
             "shoulder_pitch": 0.0,
             "shoulder_abad": 0.0,
             "shoulder_yaw": 0.0,
-            "elbow": 0.0,
+            "elbow": -1.25,
         }
 
         # * default COM for basic initialization
-        pos = [0.0, 0.0, 0.66]  # x,y,z [m]
+        pos = [0.0, 0.0, 0.72]  # x,y,z [m]
         rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
         lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
         ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
@@ -47,22 +46,22 @@ class init_state(LeggedRobotCfg.init_state):
         # * initialization for random range setup
 
         dof_pos_range = {
-            "hip_yaw": [0.0, 0.0],
-            "hip_abad": [0.0, 0.0],
-            "hip_pitch": [-0.29, -0.25],
-            "knee": [0.67, 0.71],
-            "ankle": [-0.43, -0.39],
+            "hip_yaw": [-0.1, 0.1],
+            "hip_abad": [-0.1, 0.1],
+            "hip_pitch": [-0.2, 0.2],
+            "knee": [-0.2, 0.2],
+            "ankle": [-0.15, 0.15],
             "shoulder_pitch": [0.0, 0.0],
             "shoulder_abad": [0.0, 0.0],
             "shoulder_yaw": [0.0, 0.0],
-            "elbow": [0.0, 0.0],
+            "elbow": [0, 0],
         }
         dof_vel_range = {
-            "hip_yaw": [-0.0, 0.1],
-            "hip_abad": [-0.0, 0.1],
-            "hip_pitch": [-0.1, -0.1],
-            "knee": [-0.05, 0.05],
-            "ankle": [-0.05, 0.05],
+            "hip_yaw": [-0.1, 0.1],
+            "hip_abad": [-0.1, 0.1],
+            "hip_pitch": [-0.1, 0.1],
+            "knee": [-0.1, 0.1],
+            "ankle": [-0.1, 0.1],
             "shoulder_pitch": [0.0, 0.0],
             "shoulder_abad": [0.0, 0.0],
             "shoulder_yaw": [0.0, 0.0],
@@ -72,15 +71,15 @@ class init_state(LeggedRobotCfg.init_state):
         root_pos_range = [
             [0.0, 0.0],  # x
             [0.0, 0.0],  # y
-            [0.7, 0.8],  # z
+            [0.72, 0.72],  # z
             [-0.1, 0.1],  # roll
             [-0.1, 0.1],  # pitch
             [-0.1, 0.1],
         ]  # yaw
 
         root_vel_range = [
-            [-0.5, 2.5],  # x
-            [-0.5, 0.5],  # y
+            [-0.75, 2.75],  # x
+            [-0.55, 0.55],  # y
             [-0.35, 0.1],  # z
             [-0.35, 0.35],  # roll
             [-0.35, 0.35],  # pitch
@@ -98,7 +97,7 @@ class control(LeggedRobotCfg.control):
             "shoulder_pitch": 40.0,
             "shoulder_abad": 40.0,
             "shoulder_yaw": 40.0,
-            "elbow": 40.0,
+            "elbow": 50.0,
         }  # [N*m/rad]
         damping = {
             "hip_yaw": 2.0,
@@ -118,19 +117,19 @@ class control(LeggedRobotCfg.control):
         filter_gain = 0.1586  # 1: no filtering, 0: wall
 
     class oscillator:
-        base_frequency = 1.5  # [Hz]
+        base_frequency = 2.0  # [Hz]
 
     class commands:
         resampling_time = 10.0  # time before command are changed[s]
 
         class ranges:
-            lin_vel_x = [-1.0, 4.0]  # min max [m/s]
-            lin_vel_y = 1.0  # max [m/s]
-            yaw_vel = 1  # max [rad/s]
+            lin_vel_x = [-1.0, 4.0]  # min max [m/s] [-0.75, 0.75]
+            lin_vel_y = 0.35  # max [m/s]
+            yaw_vel = 0.5  # max [rad/s]
 
     class push_robots:
-        toggle = False
-        interval_s = 2
+        toggle = True
+        interval_s = 1
         max_push_vel_xy = 0.6
         push_box_dims = [0.1, 0.1, 0.3]  # x,y,z [m]
 
@@ -147,11 +146,11 @@ class asset(LeggedRobotCfg.asset):
         )
         # foot_collisionbox_names = ["foot"]
         foot_name = "foot"
-        penalize_contacts_on = ["base", "arm"]
+        penalize_contacts_on = ["arm"]
         terminate_after_contacts_on = ["base"]
-        end_effector_names = ["hand", "foot"]
+        end_effector_names = ["hand", "foot"]  # ??
         flip_visual_attachments = False
-        self_collisions = 1  # 1 to disagble, 0 to enable...bitwise filter
+        self_collisions = 0  # 1 to disagble, 0 to enable...bitwise filter
         collapse_fixed_joints = False
         # * see GymDofDriveModeFlags
         # * (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
@@ -198,7 +197,6 @@ class scaling(LeggedRobotCfg.scaling):
             0.1,
             0.1,
         ]
-        dof_pos_obs = dof_pos
         # # * Action scales
         dof_pos_target = dof_pos
         dof_vel = [
@@ -221,7 +219,7 @@ class scaling(LeggedRobotCfg.scaling):
             1.0,
             1.0,
         ]
-        dof_pos_history = 3 * dof_pos_obs
+        dof_pos_history = 3 * dof_pos
 
 
 class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
@@ -231,8 +229,8 @@ class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
     class policy(LeggedRobotRunnerCfg.policy):
         disable_actions = False
         init_noise_std = 1.0
-        actor_hidden_dims = [512, 256, 128]
-        critic_hidden_dims = [512, 256, 128]
+        actor_hidden_dims = [256, 256, 128]
+        critic_hidden_dims = [256, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "tanh"
 
@@ -245,6 +243,9 @@ class policy(LeggedRobotRunnerCfg.policy):
             "dof_pos_obs",
             "dof_vel",
             "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
             "oscillator_obs",
         ]
         critic_obs = actor_obs
@@ -252,18 +253,17 @@ class policy(LeggedRobotRunnerCfg.policy):
         actions = ["dof_pos_target"]
 
         class noise:
-            base_height = 0.05
-            dof_pos_obs = 0.0
-            dof_vel = 0.0
-            base_lin_vel = 0.1
-            base_ang_vel = 0.2
-            projected_gravity = 0.05
-            height_measurements = 0.1
+            dof_pos = 0.005
+            dof_vel = 0.05
+            base_ang_vel = 0.025
+            base_lin_vel = 0.025
+            projected_gravity = 0.01
+            feet_contact_state = 0.025
 
         class reward:
             class weights:
-                tracking_ang_vel = 1.5
                 tracking_lin_vel = 3.0
+                tracking_ang_vel = 1.5
                 orientation = 1.0
                 torques = 5.0e-4
                 min_base_height = 1.5
@@ -277,6 +277,7 @@ class weights:
                 dof_near_home = 0.25
                 stance = 1.0
                 swing = 1.0
+                hips_forward = 0.5
                 walk_freq = 0.0
 
             class termination_weight:
diff --git a/scripts/play.py b/scripts/play.py
index 29a837ea..eb32855e 100644
--- a/scripts/play.py
+++ b/scripts/play.py
@@ -18,7 +18,7 @@ def setup(args):
     env_cfg.env.num_projectiles = 20
     task_registry.make_gym_and_sim()
     env = task_registry.make_env(args.task, env_cfg)
-    env.cfg.init_state.reset_mode = "reset_to_basic"
+    # env.cfg.init_state.reset_mode = "reset_to_basic"
     train_cfg.runner.resume = True
     train_cfg.logging.enable_local_saving = False
     runner = task_registry.make_alg_runner(env, train_cfg)

From 3ae04a080730dd24269d36c222ecc091e936d311 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sat, 23 Mar 2024 12:29:28 -0400
Subject: [PATCH 08/26] properly roll histories, retune action_rate for mc

---
 gym/envs/base/legged_robot.py                 |  2 +-
 .../mini_cheetah/mini_cheetah_ref_config.py   |  4 +--
 gym/envs/mit_humanoid/mit_humanoid.py         | 12 +++++--
 gym/envs/mit_humanoid/mit_humanoid_config.py  | 36 +++++++++----------
 4 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
index d635fea7..68178650 100644
--- a/gym/envs/base/legged_robot.py
+++ b/gym/envs/base/legged_robot.py
@@ -110,7 +110,7 @@ def _post_decimation_step(self):
 
         self.base_height = self.root_states[:, 2:3]
 
-        self.dof_pos_history.roll(self.num_actuators)
+        self.dof_pos_history = self.dof_pos_history.roll(self.num_actuators)
         self.dof_pos_history[:, : self.num_actuators] = (
             self.dof_pos - self.default_dof_pos
         )
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
index 7911206a..1a4e0aed 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -113,8 +113,8 @@ class weights:
                 dof_vel = 0.0
                 min_base_height = 1.5
                 collision = 0.0
-                action_rate = 0.01
-                action_rate2 = 0.001
+                action_rate = 1.0
+                action_rate2 = 1.0
                 stand_still = 0.0
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 5db4ab7a..489b463d 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -101,13 +101,19 @@ def _update_sampled_history_buffers(self):
             as_tuple=False,
         ).flatten()
 
-        self.sampled_history_dof_pos_target[ids].roll(self.num_dof, dims=1)  # check
+        self.sampled_history_dof_pos_target[ids] = torch.roll(
+            self.sampled_history_dof_pos_target[ids], self.num_dof, dims=1
+        )  # check
         self.sampled_history_dof_pos_target[ids, : self.num_dof] = self.dof_pos_target[
             ids
         ]
-        self.sampled_history_dof_pos[ids].roll(self.num_dof, dims=1)  # check
+        self.sampled_history_dof_pos[ids] = torch.roll(
+            self.sampled_history_dof_pos[ids], self.num_dof, dims=1
+        )  # check
         self.sampled_history_dof_pos[ids, : self.num_dof] = self.dof_pos_target[ids]
-        self.sampled_history_dof_vel[ids].roll(self.num_dof, dims=1)  # check
+        self.sampled_history_dof_vel[ids] = torch.roll(
+            self.sampled_history_dof_vel[ids], self.num_dof, dims=1
+        )  # check
         self.sampled_history_dof_vel[ids, : self.num_dof] = self.dof_pos_target[ids]
 
         self.sampled_history_counter[ids] = 0
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 89359a8c..54813f7b 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -38,7 +38,7 @@ class init_state(LeggedRobotCfg.init_state):
         }
 
         # * default COM for basic initialization
-        pos = [0.0, 0.0, 0.72]  # x,y,z [m]
+        pos = [0.0, 0.0, 0.6]  # x,y,z [m]
         rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
         lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
         ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
@@ -46,11 +46,11 @@ class init_state(LeggedRobotCfg.init_state):
         # * initialization for random range setup
 
         dof_pos_range = {
-            "hip_yaw": [-0.1, 0.1],
-            "hip_abad": [-0.1, 0.1],
-            "hip_pitch": [-0.2, 0.2],
-            "knee": [-0.2, 0.2],
-            "ankle": [-0.15, 0.15],
+            "hip_yaw": [-0.0, 0.0],
+            "hip_abad": [-0.0, 0.0],
+            "hip_pitch": [-0.667751, -0.667751],
+            "knee": [1.4087, 1.4087],
+            "ankle": [-0.708876, -0.708876],
             "shoulder_pitch": [0.0, 0.0],
             "shoulder_abad": [0.0, 0.0],
             "shoulder_yaw": [0.0, 0.0],
@@ -71,7 +71,7 @@ class init_state(LeggedRobotCfg.init_state):
         root_pos_range = [
             [0.0, 0.0],  # x
             [0.0, 0.0],  # y
-            [0.72, 0.72],  # z
+            [0.64, 0.7],  # z
             [-0.1, 0.1],  # roll
             [-0.1, 0.1],  # pitch
             [-0.1, 0.1],
@@ -250,7 +250,7 @@ class policy(LeggedRobotRunnerCfg.policy):
         ]
         critic_obs = actor_obs
 
-        actions = ["dof_pos_target"]
+        actions = ["dof_pos_target", "oscillator_freq"]
 
         class noise:
             dof_pos = 0.005
@@ -262,23 +262,23 @@ class noise:
 
         class reward:
             class weights:
-                tracking_lin_vel = 3.0
-                tracking_ang_vel = 1.5
-                orientation = 1.0
+                # tracking_lin_vel = 3.0
+                # tracking_ang_vel = 1.5
+                # orientation = 1.0
                 torques = 5.0e-4
-                min_base_height = 1.5
+                # min_base_height = 1.5
                 action_rate = 0.01
                 action_rate2 = 0.001
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.0
-                dof_vel = 0.25
-                stand_still = 0.25
+                # dof_vel = 0.25
+                # stand_still = 0.25
                 dof_pos_limits = 0.25
-                dof_near_home = 0.25
+                # dof_near_home = 0.25
                 stance = 1.0
                 swing = 1.0
                 hips_forward = 0.5
-                walk_freq = 0.0
+                walk_freq = 2.5
 
             class termination_weight:
                 termination = 15
@@ -288,11 +288,11 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
         value_loss_coef = 1.0
         use_clipped_value_loss = True
         clip_param = 0.2
-        entropy_coef = 0.002
+        entropy_coef = 0.01
         num_learning_epochs = 5
         # * mini batch size = num_envs*nsteps / nminibatches
         num_mini_batches = 4
-        learning_rate = 5.0e-4
+        learning_rate = 5.0e-5
         schedule = "adaptive"  # could be adaptive, fixed
         gamma = 0.999
         lam = 0.95

From ee05de48a620bcf598002a4b76779d9d92be3612 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sun, 24 Mar 2024 18:30:55 -0400
Subject: [PATCH 09/26] enable arms in learnt urdf diff --git
 a/gym/envs/mit_humanoid/mit_humanoid_config.py
 b/gym/envs/mit_humanoid/mit_humanoid_config.py index 54813f7..368f2c3 100644
 --- a/gym/envs/mit_humanoid/mit_humanoid_config.py +++
 b/gym/envs/mit_humanoid/mit_humanoid_config.py @@ -142,7 +142,7 @@ class
 MITHumanoidCfg(LeggedRobotCfg):      class asset(LeggedRobotCfg.asset):      
    file = (              "{LEGGED_GYM_ROOT_DIR}/resources/robots/" -         
   + "mit_humanoid/urdf/humanoid_R_sf.urdf" +            +
 "mit_humanoid/urdf/humanoid_F_sf_learnt.urdf"          )          #
 foot_collisionbox_names = ["foot"]          foot_name = "foot" @@ -289,12
 +289,12 @@ class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):         
 use_clipped_value_loss = True          clip_param = 0.2          entropy_coef
 = 0.01 -        num_learning_epochs = 5 +        num_learning_epochs = 4     
     # * mini batch size = num_envs*nsteps / nminibatches         
 num_mini_batches = 4 -        learning_rate = 5.0e-5 +        learning_rate =
 1.0e-6          schedule = "adaptive"  # could be adaptive, fixed -       
 gamma = 0.999 +        gamma = 0.99          lam = 0.95          desired_kl =
 0.01          max_grad_norm = 1.0 diff --git
 a/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
 b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf index
 5e76f34..98a0773 100644 ---
 a/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf +++
 b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf @@ -570,7
 +570,7 @@ Simple Foot: foot approximated as single box-contact -->    </link>
    <joint      name="15_left_shoulder_pitch" -    type="fixed"> +   
 type="revolute">      <origin        xyz="0.01346 0.17608 0.24657"       
 rpy="0 0 0" /> @@ -615,7 +615,7 @@ Simple Foot: foot approximated as single
 box-contact -->    </link>    <joint      name="16_left_shoulder_abad" -   
 type="fixed"> +    type="revolute">      <origin        xyz="0 .05760 0"     
   rpy="0.0 0 0" /> @@ -668,7 +668,7 @@ Simple Foot: foot approximated as
 single box-contact -->    </link>    <joint      name="17_left_shoulder_yaw"
 -    type="fixed"> +    type="revolute">      <origin        xyz="0 0
 -.10250"        rpy="0.0 0 0" /> @@ -719,7 +719,7 @@ Simple Foot: foot
 approximated as single box-contact -->    </link>    <joint     
 name="18_left_elbow" -    type="fixed"> +    type="revolute">      <origin   
     xyz="0 0 -.15750"        rpy="0 0 0.0" /> @@ -798,7 +798,7 @@ Simple
 Foot: foot approximated as single box-contact -->    </link>    <joint     
 name="11_right_shoulder_pitch" -    type="fixed"> +    type="revolute">     
 <origin        xyz="0.01346 -0.17608 0.24657"        rpy="0 0 0" /> @@ -843,7
 +843,7 @@ Simple Foot: foot approximated as single box-contact -->    </link>
    <joint      name="12_right_shoulder_abad" -    type="fixed"> +   
 type="revolute">      <origin        xyz="0 -.05760 0"        rpy="0.0 0 0"
 /> @@ -896,7 +896,7 @@ Simple Foot: foot approximated as single box-contact
 -->    </link>    <joint      name="13_right_shoulder_yaw" -    type="fixed">
 +    type="revolute">      <origin        xyz="0 0 -.10250"        rpy="0.0 0
 0" /> @@ -947,7 +947,7 @@ Simple Foot: foot approximated as single
 box-contact -->    </link>    <joint      name="14_right_elbow" -   
 type="fixed"> +    type="revolute">      <origin        xyz="0 0 -.15750"    
    rpy="0 0 0.0" />

---
 gym/envs/mit_humanoid/mit_humanoid_config.py     |  8 ++++----
 .../mit_humanoid/urdf/humanoid_F_sf_learnt.urdf  | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 54813f7b..368f2c3b 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -142,7 +142,7 @@ class domain_rand:
     class asset(LeggedRobotCfg.asset):
         file = (
             "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
-            + "mit_humanoid/urdf/humanoid_R_sf.urdf"
+            + "mit_humanoid/urdf/humanoid_F_sf_learnt.urdf"
         )
         # foot_collisionbox_names = ["foot"]
         foot_name = "foot"
@@ -289,12 +289,12 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
         use_clipped_value_loss = True
         clip_param = 0.2
         entropy_coef = 0.01
-        num_learning_epochs = 5
+        num_learning_epochs = 4
         # * mini batch size = num_envs*nsteps / nminibatches
         num_mini_batches = 4
-        learning_rate = 5.0e-5
+        learning_rate = 1.0e-6
         schedule = "adaptive"  # could be adaptive, fixed
-        gamma = 0.999
+        gamma = 0.99
         lam = 0.95
         desired_kl = 0.01
         max_grad_norm = 1.0
diff --git a/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
index 5e76f34a..98a0773a 100644
--- a/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
+++ b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
@@ -570,7 +570,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="15_left_shoulder_pitch"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0.01346 0.17608 0.24657"
       rpy="0 0 0" />
@@ -615,7 +615,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="16_left_shoulder_abad"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0 .05760 0"
       rpy="0.0 0 0" />
@@ -668,7 +668,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="17_left_shoulder_yaw"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0 0 -.10250"
       rpy="0.0 0 0" />
@@ -719,7 +719,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="18_left_elbow"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0 0 -.15750"
       rpy="0 0 0.0" />
@@ -798,7 +798,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="11_right_shoulder_pitch"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0.01346 -0.17608 0.24657"
       rpy="0 0 0" />
@@ -843,7 +843,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="12_right_shoulder_abad"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0 -.05760 0"
       rpy="0.0 0 0" />
@@ -896,7 +896,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="13_right_shoulder_yaw"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0 0 -.10250"
       rpy="0.0 0 0" />
@@ -947,7 +947,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="14_right_elbow"
-    type="fixed">
+    type="revolute">
     <origin
       xyz="0 0 -.15750"
       rpy="0 0 0.0" />

From 0aab1005ea71ad3b39712ad4a5dcbd81845d3c8c Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Sun, 24 Mar 2024 19:15:55 -0400
Subject: [PATCH 10/26] hotfix old runner, and set as default for humanoid. New
 runner is numerically unstable for humanoid

---
 gym/envs/mit_humanoid/mit_humanoid_config.py | 4 ++--
 learning/runners/old_policy_runner.py        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index 368f2c3b..a3e113b6 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -224,7 +224,7 @@ class scaling(LeggedRobotCfg.scaling):
 
 class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
     seed = -1
-    runner_class_name = "OnPolicyRunner"
+    runner_class_name = "OldPolicyRunner"
 
     class policy(LeggedRobotRunnerCfg.policy):
         disable_actions = False
@@ -301,7 +301,7 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
 
     class runner(LeggedRobotRunnerCfg.runner):
         policy_class_name = "ActorCritic"
-        algorithm_class_name = "PPO2"
+        algorithm_class_name = "PPO"
         num_steps_per_env = 32
         max_iterations = 1000
         run_name = "Standing"
diff --git a/learning/runners/old_policy_runner.py b/learning/runners/old_policy_runner.py
index 9a738b93..f42b26cf 100644
--- a/learning/runners/old_policy_runner.py
+++ b/learning/runners/old_policy_runner.py
@@ -70,7 +70,7 @@ def learn(self):
             logger.tic("learning")
             self.alg.update()
             logger.toc("learning")
-            logger.log_category()
+            logger.log_all_categories()
 
             logger.finish_iteration()
             logger.toc("iteration")

From d86162e57f3d2c64ed28f1a4a172bc3b7a90efbd Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Fri, 29 Mar 2024 13:04:45 -0400
Subject: [PATCH 11/26] WIP: tuning, critic loss higher than expected

---
 gym/envs/mit_humanoid/mit_humanoid.py        | 12 ++---
 gym/envs/mit_humanoid/mit_humanoid_config.py | 48 ++++++++++----------
 learning/algorithms/ppo2.py                  | 13 ++----
 learning/runners/on_policy_runner.py         |  4 ++
 learning/utils/dict_utils.py                 | 23 ++++------
 5 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 489b463d..118ac170 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -172,12 +172,9 @@ def _reward_dof_vel(self):
         ) * self._switch("stand")
 
     def _reward_dof_near_home(self):
-        return torch.mean(
-            self._sqrdexp(
-                (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos"]
-            ),
-            dim=1,
-        )
+        return self._sqrdexp(
+            (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos"]
+        ).mean(dim=1)
 
     def _reward_stand_still(self):
         """Penalize motion at zero commands"""
@@ -233,4 +230,5 @@ def _reward_hips_forward(self):
         hip_yaw_abad /= torch.stack(
             (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7]), dim=1
         )
-        return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)
+        return (hip_yaw_abad).pow(2).mean(dim=1)
+        # return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index a3e113b6..033409e0 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -117,20 +117,20 @@ class control(LeggedRobotCfg.control):
         filter_gain = 0.1586  # 1: no filtering, 0: wall
 
     class oscillator:
-        base_frequency = 2.0  # [Hz]
+        base_frequency = 3.0  # [Hz]
 
     class commands:
         resampling_time = 10.0  # time before command are changed[s]
 
         class ranges:
-            lin_vel_x = [-1.0, 4.0]  # min max [m/s] [-0.75, 0.75]
-            lin_vel_y = 0.35  # max [m/s]
-            yaw_vel = 0.5  # max [rad/s]
+            lin_vel_x = [-0.0, 0.0]  # min max [m/s] [-0.75, 0.75]
+            lin_vel_y = 0.0  # max [m/s]
+            yaw_vel = 0.0  # max [rad/s]
 
     class push_robots:
         toggle = True
         interval_s = 1
-        max_push_vel_xy = 0.6
+        max_push_vel_xy = 0.5
         push_box_dims = [0.1, 0.1, 0.3]  # x,y,z [m]
 
     class domain_rand:
@@ -155,18 +155,18 @@ class asset(LeggedRobotCfg.asset):
         # * see GymDofDriveModeFlags
         # * (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
         default_dof_drive_mode = 3
-        fix_base_link = False
+        fix_base_link = True
         disable_gravity = False
         disable_motors = False
         total_mass = 25.0
 
     class reward_settings(LeggedRobotCfg.reward_settings):
-        soft_dof_pos_limit = 0.9
-        soft_dof_vel_limit = 0.9
-        soft_torque_limit = 0.9
+        soft_dof_pos_limit = 0.8
+        soft_dof_vel_limit = 0.8
+        soft_torque_limit = 0.8
         max_contact_force = 1500.0
         base_height_target = BASE_HEIGHT_REF
-        tracking_sigma = 0.5
+        tracking_sigma = 0.25
 
         # a smooth switch based on |cmd| (commanded velocity).
         switch_scale = 0.5
@@ -229,28 +229,28 @@ class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
     class policy(LeggedRobotRunnerCfg.policy):
         disable_actions = False
         init_noise_std = 1.0
-        actor_hidden_dims = [256, 256, 128]
-        critic_hidden_dims = [256, 256, 128]
+        actor_hidden_dims = [512, 512, 128]
+        critic_hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-        activation = "tanh"
+        activation = "elu"
 
         actor_obs = [
             "base_height",
             "base_lin_vel",
             "base_ang_vel",
             "projected_gravity",
-            "commands",
+            # "commands",
             "dof_pos_obs",
             "dof_vel",
             "dof_pos_history",
-            "sampled_history_dof_pos",
-            "sampled_history_dof_vel",
-            "sampled_history_dof_pos_target",
+            # "sampled_history_dof_pos",
+            # "sampled_history_dof_vel",
+            # "sampled_history_dof_pos_target",
             "oscillator_obs",
         ]
         critic_obs = actor_obs
 
-        actions = ["dof_pos_target", "oscillator_freq"]
+        actions = ["dof_pos_target"]
 
         class noise:
             dof_pos = 0.005
@@ -262,23 +262,23 @@ class noise:
 
         class reward:
             class weights:
-                # tracking_lin_vel = 3.0
+                # tracking_lin_vel = 0.0
                 # tracking_ang_vel = 1.5
                 # orientation = 1.0
                 torques = 5.0e-4
                 # min_base_height = 1.5
-                action_rate = 0.01
-                action_rate2 = 0.001
+                action_rate = 1e-3
+                action_rate2 = 1e-3
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.0
                 # dof_vel = 0.25
                 # stand_still = 0.25
                 dof_pos_limits = 0.25
-                # dof_near_home = 0.25
+                dof_near_home = 0.25
                 stance = 1.0
                 swing = 1.0
-                hips_forward = 0.5
-                walk_freq = 2.5
+                hips_forward = 0.0
+                walk_freq = 0.0  # 2.5
 
             class termination_weight:
                 termination = 15
diff --git a/learning/algorithms/ppo2.py b/learning/algorithms/ppo2.py
index 69d6dcd2..efdce668 100644
--- a/learning/algorithms/ppo2.py
+++ b/learning/algorithms/ppo2.py
@@ -65,15 +65,12 @@ def train_mode(self):
     def act(self, obs, critic_obs):
         return self.actor.act(obs).detach()
 
-    def update(self, data, last_obs=None):
-        if last_obs is None:
-            last_values = None
-        else:
-            with torch.no_grad():
-                last_values = self.critic.evaluate(last_obs).detach()
-        compute_generalized_advantages(
-            data, self.gamma, self.lam, self.critic, last_values
+    def update(self, data):
+        data["values"] = self.critic.evaluate(data["critic_obs"])
+        data["advantages"] = compute_generalized_advantages(
+            data, self.gamma, self.lam, self.critic
         )
+        data["returns"] = data["advantages"] + data["values"]
 
         self.update_critic(data)
         self.update_actor(data)
diff --git a/learning/runners/on_policy_runner.py b/learning/runners/on_policy_runner.py
index 91e2ac26..6350eaa5 100644
--- a/learning/runners/on_policy_runner.py
+++ b/learning/runners/on_policy_runner.py
@@ -38,8 +38,10 @@ def learn(self):
         transition.update(
             {
                 "actor_obs": actor_obs,
+                "next_actor_obs": actor_obs,
                 "actions": self.alg.act(actor_obs, critic_obs),
                 "critic_obs": critic_obs,
+                "next_critic_obs": critic_obs,
                 "rewards": self.get_rewards({"termination": 0.0})["termination"],
                 "dones": self.get_timed_out(),
             }
@@ -89,6 +91,8 @@ def learn(self):
 
                     transition.update(
                         {
+                            "next_actor_obs": actor_obs,
+                            "next_critic_obs": critic_obs,
                             "rewards": total_rewards,
                             "timed_out": timed_out,
                             "dones": dones,
diff --git a/learning/utils/dict_utils.py b/learning/utils/dict_utils.py
index 2d19e072..4f4ff420 100644
--- a/learning/utils/dict_utils.py
+++ b/learning/utils/dict_utils.py
@@ -23,16 +23,14 @@ def compute_MC_returns(data: TensorDict, gamma, critic=None):
 
 
 @torch.no_grad
-def compute_generalized_advantages(data, gamma, lam, critic, last_values=None):
-    data.update({"values": critic.evaluate(data["critic_obs"])})
-
-    data.update({"advantages": torch.zeros_like(data["values"])})
-
+def compute_generalized_advantages(data, gamma, lam, critic):
+    last_values = critic.evaluate(data["next_critic_obs"][-1])
+    advantages = torch.zeros_like(data["values"])
     if last_values is not None:
         # todo check this
         # since we don't have observations for the last step, need last value plugged in
         not_done = ~data["dones"][-1]
-        data["advantages"][-1] = (
+        advantages[-1] = (
             data["rewards"][-1]
             + gamma * data["values"][-1] * data["timed_out"][-1]
             + gamma * last_values * not_done
@@ -47,15 +45,14 @@ def compute_generalized_advantages(data, gamma, lam, critic, last_values=None):
             + gamma * data["values"][k + 1] * not_done
             - data["values"][k]
         )
-        data["advantages"][k] = (
-            td_error + gamma * lam * not_done * data["advantages"][k + 1]
-        )
+        advantages[k] = td_error + gamma * lam * not_done * advantages[k + 1]
 
-    data["returns"] = data["advantages"] + data["values"]
+    return normalize(advantages)
 
-    data["advantages"] = (data["advantages"] - data["advantages"].mean()) / (
-        data["advantages"].std() + 1e-8
-    )
+
+@torch.no_grad
+def normalize(input, eps=1e-8):
+    return (input - input.mean()) / (input.std() + eps)
 
 
 # todo change num_epochs to num_batches

From 098989a175231ca70a814230c1adf9ca5aabbb9b Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Tue, 6 Aug 2024 13:19:37 -0400
Subject: [PATCH 12/26] fix hips_forward dimensionality

---
 gym/envs/mit_humanoid/mit_humanoid.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 118ac170..265f0ff9 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -223,12 +223,12 @@ def _reward_walk_freq(self):
 
     def _reward_hips_forward(self):
         # reward hip motors for pointing forward
-        hip_yaw_abad = torch.stack((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
-        hip_yaw_abad -= torch.stack(
+        hip_yaw_abad = torch.cat((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
+        hip_yaw_abad -= torch.cat(
             (self.default_dof_pos[:, 0:2], self.default_dof_pos[:, 5:7]), dim=1
         )
-        hip_yaw_abad /= torch.stack(
-            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7]), dim=1
+        hip_yaw_abad /= torch.cat(
+            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7])
         )
         return (hip_yaw_abad).pow(2).mean(dim=1)
         # return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)

From 0611bcd52ba326d8a23b929a679382a72f14b0f9 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Tue, 6 Aug 2024 13:43:10 -0400
Subject: [PATCH 13/26] lander env

---
 gym/envs/__init__.py                   |   6 +-
 gym/envs/mit_humanoid/lander.py        | 195 +++++++++++++++
 gym/envs/mit_humanoid/lander_config.py | 331 +++++++++++++++++++++++++
 3 files changed, 531 insertions(+), 1 deletion(-)
 create mode 100644 gym/envs/mit_humanoid/lander.py
 create mode 100644 gym/envs/mit_humanoid/lander_config.py

diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py
index f9d8b788..26710fce 100644
--- a/gym/envs/__init__.py
+++ b/gym/envs/__init__.py
@@ -19,7 +19,8 @@
     "Anymal": ".anymal_c.anymal",
     "A1": ".a1.a1",
     "HumanoidRunning": ".mit_humanoid.humanoid_running",
-    "Pendulum": ".pendulum.pendulum"
+    "Pendulum": ".pendulum.pendulum",
+    "Lander": ".mit_humanoid.lander",
 }
 
 config_dict = {
@@ -34,6 +35,7 @@
     "HumanoidRunningCfg": ".mit_humanoid.humanoid_running_config",
     "PendulumCfg": ".pendulum.pendulum_config",
     "PendulumSACCfg": ".pendulum.pendulum_SAC_config",
+    "LanderCfg": ".mit_humanoid.lander_config",
 }
 
 runner_config_dict = {
@@ -48,6 +50,7 @@
     "HumanoidRunningRunnerCfg": ".mit_humanoid.humanoid_running_config",
     "PendulumRunnerCfg": ".pendulum.pendulum_config",
     "PendulumSACRunnerCfg": ".pendulum.pendulum_SAC_config",
+    "LanderRunnerCfg": ".mit_humanoid.lander_config",
 }
 
 task_dict = {
@@ -77,6 +80,7 @@
     "flat_anymal_c": ["Anymal", "AnymalCFlatCfg", "AnymalCFlatRunnerCfg"],
     "pendulum": ["Pendulum", "PendulumCfg", "PendulumRunnerCfg"],
     "sac_pendulum": ["Pendulum", "PendulumSACCfg", "PendulumSACRunnerCfg"],
+    "lander": ["Lander", "LanderCfg", "LanderRunnerCfg"],
 }
 
 for class_name, class_location in class_dict.items():
diff --git a/gym/envs/mit_humanoid/lander.py b/gym/envs/mit_humanoid/lander.py
new file mode 100644
index 00000000..5856efb8
--- /dev/null
+++ b/gym/envs/mit_humanoid/lander.py
@@ -0,0 +1,195 @@
+import torch
+
+from gym.envs.base.legged_robot import LeggedRobot
+
+# from gym.envs.mit_humanoid.mit_humanoid import MIT_Humanoid
+from .jacobian import _apply_coupling
+
+
+class Lander(LeggedRobot):
+    def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
+        super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
+
+    def _init_buffers(self):
+        super()._init_buffers()
+        self._init_sampled_history_buffers()
+
+    def _init_sampled_history_buffers(self):
+        self.sampled_history_dof_pos_target = torch.zeros(
+            (self.num_envs, self.num_dof * self.cfg.env.sampled_history_length),
+            device=self.device,
+        )
+        self.sampled_history_dof_pos = torch.zeros(
+            self.num_envs,
+            self.num_dof * self.cfg.env.sampled_history_length,
+            device=self.device,
+        )
+        self.sampled_history_dof_vel = torch.zeros(
+            self.num_envs,
+            self.num_dof * self.cfg.env.sampled_history_length,
+            device=self.device,
+        )
+        self.sampled_history_counter = torch.zeros(
+            self.num_envs, dtype=int, device=self.device
+        )
+        self.sampled_history_threshold = int(
+            self.cfg.control.ctrl_frequency / self.cfg.env.sampled_history_frequency
+        )
+
+    def _reset_system(self, env_ids):
+        if len(env_ids) == 0:
+            return
+        super()._reset_system(env_ids)
+
+        self._reset_sampled_history_buffers(env_ids)
+        return
+
+    def _reset_sampled_history_buffers(self, ids):
+        n = self.cfg.env.sampled_history_length
+        self.sampled_history_dof_pos_target[ids] = self.dof_pos_target[ids].tile(n)
+        self.sampled_history_dof_pos[ids] = self.dof_pos_target[ids].tile(n)
+        self.sampled_history_dof_vel[ids] = self.dof_pos_target[ids].tile(n)
+
+    # compute_torques accounting for coupling, and filtering torques
+    def _compute_torques(self):
+        torques = _apply_coupling(
+            self.dof_pos,
+            self.dof_vel,
+            self.dof_pos_target + self.default_dof_pos,
+            self.dof_vel_target,
+            self.p_gains,
+            self.d_gains,
+            self.tau_ff,
+        )
+        torques = torques.clip(-self.torque_limits, self.torque_limits)
+        return torques
+
+    def _post_decimation_step(self):
+        super()._post_decimation_step()
+        self._update_sampled_history_buffers()
+
+    def _update_sampled_history_buffers(self):
+        self.sampled_history_counter += 1
+
+        ids = torch.nonzero(
+            self.sampled_history_counter == self.sampled_history_threshold,
+            as_tuple=False,
+        ).flatten()
+
+        self.sampled_history_dof_pos_target[ids] = torch.roll(
+            self.sampled_history_dof_pos_target[ids], self.num_dof, dims=1
+        )  # check
+        self.sampled_history_dof_pos_target[ids, : self.num_dof] = self.dof_pos_target[
+            ids
+        ]
+        self.sampled_history_dof_pos[ids] = torch.roll(
+            self.sampled_history_dof_pos[ids], self.num_dof, dims=1
+        )  # check
+        self.sampled_history_dof_pos[ids, : self.num_dof] = self.dof_pos_target[ids]
+        self.sampled_history_dof_vel[ids] = torch.roll(
+            self.sampled_history_dof_vel[ids], self.num_dof, dims=1
+        )  # check
+        self.sampled_history_dof_vel[ids, : self.num_dof] = self.dof_pos_target[ids]
+
+        self.sampled_history_counter[ids] = 0
+
+    # --- rewards ---
+
+    def _switch(self, mode=None):
+        c_vel = torch.linalg.norm(self.commands, dim=1)
+        switch = torch.exp(
+            -torch.square(
+                torch.max(
+                    torch.zeros_like(c_vel),
+                    c_vel - self.cfg.reward_settings.switch_scale,
+                )
+            )
+            / self.cfg.reward_settings.switch_scale
+        )
+        if mode is None or mode == "stand":
+            return switch
+        elif mode == "move":
+            return 1 - switch
+
+    def _reward_lin_vel_xy(self):
+        return torch.exp(
+            -torch.linalg.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
+        )
+
+    def _reward_lin_vel_z(self):
+        # Penalize z axis base linear velocity w. squared exp
+        return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"])
+
+    def _reward_orientation(self):
+        # Penalize non flat base orientation
+        return torch.sum(
+            self._sqrdexp(torch.square(self.projected_gravity[:, :2])), dim=1
+        )
+
+    def _reward_min_base_height(self):
+        """Squared exponential saturating at base_height target"""
+        error = self.base_height - self.cfg.reward_settings.base_height_target
+        error = torch.clamp(error, max=0, min=None).flatten()
+        return self._sqrdexp(error)
+
+    def _reward_tracking_lin_vel(self):
+        """Tracking of linear velocity commands (xy axes)"""
+        # just use lin_vel?
+        error = self.commands[:, :2] - self.base_lin_vel[:, :2]
+        # * scale by (1+|cmd|): if cmd=0, no scaling.
+        error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2]))
+        return torch.mean(self._sqrdexp(error), dim=1)
+
+    def _reward_dof_vel(self):
+        # Penalize dof velocities
+        return torch.mean(
+            self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1
+        ) * self._switch("stand")
+
+    def _reward_dof_near_home(self):
+        return self._sqrdexp(
+            (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos"]
+        ).mean(dim=1)
+
+    def _reward_stand_still(self):
+        """Penalize motion at zero commands"""
+        # * normalize angles so we care about being within 5 deg
+        rew_pos = torch.mean(
+            self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1
+        )
+        rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
+        rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
+        rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
+        return rew_vel + rew_pos - rew_base_vel * self._switch("stand")
+
+    def _compute_grf(self, grf_norm=True):
+        grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
+        if grf_norm:
+            return torch.clamp_max(grf / self.cfg.asset.total_mass, 1.0)
+        else:
+            return grf
+
+    def smooth_sqr_wave(self, phase, sigma=0.2):  # sigma=0 is step function
+        return phase.sin() / (2 * torch.sqrt(phase.sin() ** 2.0 + sigma**2.0)) + 0.5
+
+    def _reward_walk_freq(self):
+        # Penalize deviation from base frequency
+        return torch.mean(
+            self._sqrdexp(
+                (self.oscillator_freq - self.cfg.oscillator.base_frequency)
+                / self.cfg.oscillator.base_frequency
+            ),
+            dim=1,
+        ) * self._switch("move")
+
+    def _reward_hips_forward(self):
+        # reward hip motors for pointing forward
+        hip_yaw_abad = torch.stack((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
+        hip_yaw_abad -= torch.stack(
+            (self.default_dof_pos[:, 0:2], self.default_dof_pos[:, 5:7]), dim=1
+        )
+        hip_yaw_abad /= torch.stack(
+            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7]), dim=1
+        )
+        return (hip_yaw_abad).pow(2).mean(dim=1)
+        # return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)
diff --git a/gym/envs/mit_humanoid/lander_config.py b/gym/envs/mit_humanoid/lander_config.py
new file mode 100644
index 00000000..a4262f0d
--- /dev/null
+++ b/gym/envs/mit_humanoid/lander_config.py
@@ -0,0 +1,331 @@
+from gym.envs.base.legged_robot_config import (
+    LeggedRobotCfg,
+    LeggedRobotRunnerCfg,
+)
+
+BASE_HEIGHT_REF = 0.80
+
+
+class LanderCfg(LeggedRobotCfg):
+    class env(LeggedRobotCfg.env):
+        num_envs = 4096
+        num_actuators = 18
+        episode_length_s = 5  # episode length in seconds
+
+        sampled_history_length = 3  # n samples
+        sampled_history_frequency = 10  # [Hz]
+
+    class terrain(LeggedRobotCfg.terrain):
+        pass
+
+    class init_state(LeggedRobotCfg.init_state):
+        # * default setup chooses how the initial conditions are chosen.
+        # * "reset_to_basic" = a single position with added randomized noise.
+        # * "reset_to_range" = a range of joint positions and velocities.
+        # * "reset_to_traj" = feed in a trajectory to sample from.
+        reset_mode = "reset_to_range"
+
+        default_joint_angles = {
+            "hip_yaw": 0.0,
+            "hip_abad": 0.0,
+            "hip_pitch": -0.667751,
+            "knee": 1.4087,
+            "ankle": -0.708876,
+            "shoulder_pitch": 0.0,
+            "shoulder_abad": 0.0,
+            "shoulder_yaw": 0.0,
+            "elbow": -1.25,
+        }
+
+        # * default COM for basic initialization
+        pos = [0.0, 0.0, 0.6]  # x,y,z [m]
+        rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
+        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
+        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+
+        # * initialization for random range setup
+
+        dof_pos_range = {
+            "hip_yaw": [-0.0, 0.0],
+            "hip_abad": [-0.0, 0.0],
+            "hip_pitch": [-0.667751, -0.667751],
+            "knee": [1.4087, 1.4087],
+            "ankle": [-0.708876, -0.708876],
+            "shoulder_pitch": [0.0, 0.0],
+            "shoulder_abad": [0.0, 0.0],
+            "shoulder_yaw": [0.0, 0.0],
+            "elbow": [0, 0],
+        }
+        dof_vel_range = {
+            "hip_yaw": [-0.1, 0.1],
+            "hip_abad": [-0.1, 0.1],
+            "hip_pitch": [-0.1, 0.1],
+            "knee": [-0.1, 0.1],
+            "ankle": [-0.1, 0.1],
+            "shoulder_pitch": [0.0, 0.0],
+            "shoulder_abad": [0.0, 0.0],
+            "shoulder_yaw": [0.0, 0.0],
+            "elbow": [0.0, 0.0],
+        }
+
+        root_pos_range = [
+            [0.0, 0.0],  # x
+            [0.0, 0.0],  # y
+            [1.0, 1.5],  # z
+            [-0.1, 0.1],  # roll
+            [-0.1, 0.1],  # pitch
+            [-0.1, 0.1],
+        ]  # yaw
+
+        root_vel_range = [
+            [-0.75, 2.75],  # x
+            [-0.55, 0.55],  # y
+            [-2.5, 0.25],  # z
+            [-0.35, 0.35],  # roll
+            [-0.35, 0.35],  # pitch
+            [-0.35, 0.35],  # yaw
+        ]
+
+    class control(LeggedRobotCfg.control):
+        # * PD Drive parameters:
+        stiffness = {
+            "hip_yaw": 30.0,
+            "hip_abad": 30.0,
+            "hip_pitch": 30.0,
+            "knee": 30.0,
+            "ankle": 30.0,
+            "shoulder_pitch": 40.0,
+            "shoulder_abad": 40.0,
+            "shoulder_yaw": 40.0,
+            "elbow": 50.0,
+        }  # [N*m/rad]
+        damping = {
+            "hip_yaw": 2.0,
+            "hip_abad": 2.0,
+            "hip_pitch": 2.0,
+            "knee": 2.0,
+            "ankle": 2.0,
+            "shoulder_pitch": 2.0,
+            "shoulder_abad": 2.0,
+            "shoulder_yaw": 2.0,
+            "elbow": 1.0,
+        }  # [N*m*s/rad]
+
+        ctrl_frequency = 100
+        desired_sim_frequency = 1000
+
+    # class oscillator:
+    #     base_frequency = 3.0  # [Hz]
+
+    class commands:
+        resampling_time = 10.0  # time before command are changed[s]
+
+        class ranges:
+            lin_vel_x = [-0.0, 0.0]  # min max [m/s] [-0.75, 0.75]
+            lin_vel_y = 0.0  # max [m/s]
+            yaw_vel = 0.0  # max [rad/s]
+
+    class push_robots:
+        toggle = True
+        interval_s = 1
+        max_push_vel_xy = 0.5
+        push_box_dims = [0.1, 0.1, 0.3]  # x,y,z [m]
+
+    class domain_rand:
+        randomize_friction = True
+        friction_range = [0.5, 1.25]
+        randomize_base_mass = True
+        added_mass_range = [-1.0, 1.0]
+
+    class asset(LeggedRobotCfg.asset):
+        file = (
+            "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
+            + "mit_humanoid/urdf/humanoid_F_sf_learnt.urdf"
+        )
+        # foot_collisionbox_names = ["foot"]
+        foot_name = "foot"
+        penalize_contacts_on = ["arm"]
+        terminate_after_contacts_on = ["base"]
+        end_effector_names = ["hand", "foot"]  # ??
+        flip_visual_attachments = False
+        self_collisions = 0  # 1 to disagble, 0 to enable...bitwise filter
+        collapse_fixed_joints = False
+        # * see GymDofDriveModeFlags
+        # * (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
+        default_dof_drive_mode = 3
+        fix_base_link = False
+        disable_gravity = False
+        disable_motors = False
+        total_mass = 25.0
+
+    class reward_settings(LeggedRobotCfg.reward_settings):
+        soft_dof_pos_limit = 0.8
+        soft_dof_vel_limit = 0.8
+        soft_torque_limit = 0.8
+        max_contact_force = 1500.0
+        base_height_target = BASE_HEIGHT_REF
+        tracking_sigma = 0.25
+
+        # a smooth switch based on |cmd| (commanded velocity).
+        switch_scale = 0.5
+        switch_threshold = 0.2
+
+    class scaling(LeggedRobotCfg.scaling):
+        base_ang_vel = 2.5
+        base_lin_vel = 1.5
+        commands = 1
+        base_height = BASE_HEIGHT_REF
+        dof_pos = [
+            0.1,
+            0.2,
+            0.8,
+            0.8,
+            0.8,
+            0.1,
+            0.2,
+            0.8,
+            0.8,
+            0.8,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+        ]
+        # # * Action scales
+        dof_pos_target = dof_pos
+        dof_vel = [
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+        ]
+        dof_pos_history = 3 * dof_pos
+
+
+class LanderRunnerCfg(LeggedRobotRunnerCfg):
+    seed = -1
+    runner_class_name = "OnPolicyRunner"
+
+    class actor(LeggedRobotRunnerCfg.actor):
+        init_noise_std = 1.0
+        hidden_dims = [512, 256, 128]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+        smooth_exploration = False
+
+        obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            # "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "dof_pos_history",
+            # "sampled_history_dof_pos",
+            # "sampled_history_dof_vel",
+            # "sampled_history_dof_pos_target",
+            # "oscillator_obs",
+        ]
+        normalize_obs = True
+
+        actions = ["dof_pos_target"]
+        disable_actions = False
+
+        class noise:
+            dof_pos = 0.005
+            dof_vel = 0.05
+            base_ang_vel = 0.025
+            base_lin_vel = 0.025
+            projected_gravity = 0.01
+            feet_contact_state = 0.025
+
+    class critic(LeggedRobotRunnerCfg.critic):
+        hidden_dims = [512, 256, 128]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+
+        obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "dof_pos_history",
+        ]
+        normalize_obs = True
+
+        class reward:
+            class weights:
+                # tracking_lin_vel = 0.0
+                # tracking_ang_vel = 1.5
+                # orientation = 1.0
+                torques = 5.0e-4
+                # min_base_height = 1.5
+                action_rate = 1e-3
+                action_rate2 = 1e-3
+                lin_vel_z = 0.0
+                ang_vel_xy = 0.0
+                # dof_vel = 0.25
+                # stand_still = 0.25
+                dof_pos_limits = 0.25
+                dof_near_home = 0.25
+                # stance = 1.0
+                # swing = 1.0
+                hips_forward = 0.0
+                # walk_freq = 0.0  # 2.5
+
+            class termination_weight:
+                termination = 15
+
+    class algorithm(LeggedRobotRunnerCfg.algorithm):
+        # both
+        gamma = 0.99
+        lam = 0.95
+        # shared
+        batch_size = 2**15
+        max_gradient_steps = 24
+        # new
+        storage_size = 2**17  # new
+        batch_size = 2**15  #  new
+
+        clip_param = 0.2
+        learning_rate = 1.0e-3
+        max_grad_norm = 1.0
+        # Critic
+        use_clipped_value_loss = True
+        # Actor
+        entropy_coef = 0.01
+        schedule = "adaptive"  # could be adaptive, fixed
+        desired_kl = 0.01
+        lr_range = [1e-5, 1e-2]
+        lr_ratio = 1.5
+
+    class runner(LeggedRobotRunnerCfg.runner):
+        policy_class_name = "ActorCritic"
+        algorithm_class_name = "PPO2"
+        num_steps_per_env = 24
+        max_iterations = 1000
+        run_name = "Standing"
+        experiment_name = "Humanoid"
+        save_interval = 50

From fd308656c2aeaf81e02c79c1d19c22c976ebf02b Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Wed, 7 Aug 2024 14:17:00 -0400
Subject: [PATCH 14/26] unify humanoid and lander put jacobian into
 MIT_humanoid base environment, rather than import; importing breaks when
 running --original_config

---
 gym/envs/mit_humanoid/lander.py        | 113 +++----------------------
 gym/envs/mit_humanoid/lander_config.py |  36 ++++----
 gym/envs/mit_humanoid/mit_humanoid.py  |  99 ++++++++++------------
 3 files changed, 74 insertions(+), 174 deletions(-)

diff --git a/gym/envs/mit_humanoid/lander.py b/gym/envs/mit_humanoid/lander.py
index 5856efb8..972eb413 100644
--- a/gym/envs/mit_humanoid/lander.py
+++ b/gym/envs/mit_humanoid/lander.py
@@ -1,98 +1,13 @@
 import torch
 
-from gym.envs.base.legged_robot import LeggedRobot
+# from gym.envs.base.legged_robot import LeggedRobot
+from gym.envs.mit_humanoid.mit_humanoid import MIT_Humanoid
 
-# from gym.envs.mit_humanoid.mit_humanoid import MIT_Humanoid
-from .jacobian import _apply_coupling
 
-
-class Lander(LeggedRobot):
+class Lander(MIT_Humanoid):
     def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
         super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
 
-    def _init_buffers(self):
-        super()._init_buffers()
-        self._init_sampled_history_buffers()
-
-    def _init_sampled_history_buffers(self):
-        self.sampled_history_dof_pos_target = torch.zeros(
-            (self.num_envs, self.num_dof * self.cfg.env.sampled_history_length),
-            device=self.device,
-        )
-        self.sampled_history_dof_pos = torch.zeros(
-            self.num_envs,
-            self.num_dof * self.cfg.env.sampled_history_length,
-            device=self.device,
-        )
-        self.sampled_history_dof_vel = torch.zeros(
-            self.num_envs,
-            self.num_dof * self.cfg.env.sampled_history_length,
-            device=self.device,
-        )
-        self.sampled_history_counter = torch.zeros(
-            self.num_envs, dtype=int, device=self.device
-        )
-        self.sampled_history_threshold = int(
-            self.cfg.control.ctrl_frequency / self.cfg.env.sampled_history_frequency
-        )
-
-    def _reset_system(self, env_ids):
-        if len(env_ids) == 0:
-            return
-        super()._reset_system(env_ids)
-
-        self._reset_sampled_history_buffers(env_ids)
-        return
-
-    def _reset_sampled_history_buffers(self, ids):
-        n = self.cfg.env.sampled_history_length
-        self.sampled_history_dof_pos_target[ids] = self.dof_pos_target[ids].tile(n)
-        self.sampled_history_dof_pos[ids] = self.dof_pos_target[ids].tile(n)
-        self.sampled_history_dof_vel[ids] = self.dof_pos_target[ids].tile(n)
-
-    # compute_torques accounting for coupling, and filtering torques
-    def _compute_torques(self):
-        torques = _apply_coupling(
-            self.dof_pos,
-            self.dof_vel,
-            self.dof_pos_target + self.default_dof_pos,
-            self.dof_vel_target,
-            self.p_gains,
-            self.d_gains,
-            self.tau_ff,
-        )
-        torques = torques.clip(-self.torque_limits, self.torque_limits)
-        return torques
-
-    def _post_decimation_step(self):
-        super()._post_decimation_step()
-        self._update_sampled_history_buffers()
-
-    def _update_sampled_history_buffers(self):
-        self.sampled_history_counter += 1
-
-        ids = torch.nonzero(
-            self.sampled_history_counter == self.sampled_history_threshold,
-            as_tuple=False,
-        ).flatten()
-
-        self.sampled_history_dof_pos_target[ids] = torch.roll(
-            self.sampled_history_dof_pos_target[ids], self.num_dof, dims=1
-        )  # check
-        self.sampled_history_dof_pos_target[ids, : self.num_dof] = self.dof_pos_target[
-            ids
-        ]
-        self.sampled_history_dof_pos[ids] = torch.roll(
-            self.sampled_history_dof_pos[ids], self.num_dof, dims=1
-        )  # check
-        self.sampled_history_dof_pos[ids, : self.num_dof] = self.dof_pos_target[ids]
-        self.sampled_history_dof_vel[ids] = torch.roll(
-            self.sampled_history_dof_vel[ids], self.num_dof, dims=1
-        )  # check
-        self.sampled_history_dof_vel[ids, : self.num_dof] = self.dof_pos_target[ids]
-
-        self.sampled_history_counter[ids] = 0
-
     # --- rewards ---
 
     def _switch(self, mode=None):
@@ -172,24 +87,18 @@ def _compute_grf(self, grf_norm=True):
     def smooth_sqr_wave(self, phase, sigma=0.2):  # sigma=0 is step function
         return phase.sin() / (2 * torch.sqrt(phase.sin() ** 2.0 + sigma**2.0)) + 0.5
 
-    def _reward_walk_freq(self):
-        # Penalize deviation from base frequency
-        return torch.mean(
-            self._sqrdexp(
-                (self.oscillator_freq - self.cfg.oscillator.base_frequency)
-                / self.cfg.oscillator.base_frequency
-            ),
-            dim=1,
-        ) * self._switch("move")
-
     def _reward_hips_forward(self):
         # reward hip motors for pointing forward
-        hip_yaw_abad = torch.stack((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
-        hip_yaw_abad -= torch.stack(
+        hip_yaw_abad = torch.cat((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
+        hip_yaw_abad -= torch.cat(
             (self.default_dof_pos[:, 0:2], self.default_dof_pos[:, 5:7]), dim=1
         )
-        hip_yaw_abad /= torch.stack(
-            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7]), dim=1
+        hip_yaw_abad /= torch.cat(
+            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7])
         )
         return (hip_yaw_abad).pow(2).mean(dim=1)
         # return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)
+
+    def _reward_power(self):
+        power = self.torques * self.dof_vel
+        return power.pow(2).mean(dim=1)
diff --git a/gym/envs/mit_humanoid/lander_config.py b/gym/envs/mit_humanoid/lander_config.py
index a4262f0d..0de34b4c 100644
--- a/gym/envs/mit_humanoid/lander_config.py
+++ b/gym/envs/mit_humanoid/lander_config.py
@@ -10,7 +10,7 @@ class LanderCfg(LeggedRobotCfg):
     class env(LeggedRobotCfg.env):
         num_envs = 4096
         num_actuators = 18
-        episode_length_s = 5  # episode length in seconds
+        episode_length_s = 10  # episode length in seconds
 
         sampled_history_length = 3  # n samples
         sampled_history_frequency = 10  # [Hz]
@@ -71,7 +71,7 @@ class init_state(LeggedRobotCfg.init_state):
         root_pos_range = [
             [0.0, 0.0],  # x
             [0.0, 0.0],  # y
-            [1.0, 1.5],  # z
+            [0.64, 1.5],  # z
             [-0.1, 0.1],  # roll
             [-0.1, 0.1],  # pitch
             [-0.1, 0.1],
@@ -144,7 +144,7 @@ class asset(LeggedRobotCfg.asset):
         )
         # foot_collisionbox_names = ["foot"]
         foot_name = "foot"
-        penalize_contacts_on = ["arm"]
+        penalize_contacts_on = ["arm", "hand", "shoulder"]
         terminate_after_contacts_on = ["base"]
         end_effector_names = ["hand", "foot"]  # ??
         flip_visual_attachments = False
@@ -228,7 +228,7 @@ class actor(LeggedRobotRunnerCfg.actor):
         init_noise_std = 1.0
         hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-        activation = "elu"
+        activation = "tanh"
         smooth_exploration = False
 
         obs = [
@@ -236,16 +236,15 @@ class actor(LeggedRobotRunnerCfg.actor):
             "base_lin_vel",
             "base_ang_vel",
             "projected_gravity",
-            # "commands",
+            "commands",
             "dof_pos_obs",
             "dof_vel",
             "dof_pos_history",
-            # "sampled_history_dof_pos",
-            # "sampled_history_dof_vel",
-            # "sampled_history_dof_pos_target",
-            # "oscillator_obs",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         actions = ["dof_pos_target"]
         disable_actions = False
@@ -261,10 +260,10 @@ class noise:
     class critic(LeggedRobotRunnerCfg.critic):
         hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-        activation = "elu"
+        activation = "tanh"
 
         obs = [
-            "base_height",
+            # "base_height",
             "base_lin_vel",
             "base_ang_vel",
             "projected_gravity",
@@ -272,21 +271,25 @@ class critic(LeggedRobotRunnerCfg.critic):
             "dof_pos_obs",
             "dof_vel",
             "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         class reward:
             class weights:
                 # tracking_lin_vel = 0.0
                 # tracking_ang_vel = 1.5
                 # orientation = 1.0
-                torques = 5.0e-4
-                # min_base_height = 1.5
+                torques = 5.0e-5
+                power = 0  # 1.0e-2
+                min_base_height = 1.5
                 action_rate = 1e-3
                 action_rate2 = 1e-3
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.0
-                # dof_vel = 0.25
+                dof_vel = 0.5
                 # stand_still = 0.25
                 dof_pos_limits = 0.25
                 dof_near_home = 0.25
@@ -294,6 +297,7 @@ class weights:
                 # swing = 1.0
                 hips_forward = 0.0
                 # walk_freq = 0.0  # 2.5
+                collision = 1.0
 
             class termination_weight:
                 termination = 15
diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 265f0ff9..a580fd57 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -1,8 +1,6 @@
 import torch
 
 from gym.envs.base.legged_robot import LeggedRobot
-from .jacobian import _apply_coupling
-from gym.utils import exp_avg_filter
 
 
 class MIT_Humanoid(LeggedRobot):
@@ -11,9 +9,6 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
 
     def _init_buffers(self):
         super()._init_buffers()
-        self.oscillators = torch.zeros(self.num_envs, 2, device=self.device)
-        self.oscillator_obs = torch.zeros(self.num_envs, 4, device=self.device)
-        self.oscillator_freq = torch.zeros(self.num_envs, 2, device=self.device)
         self._init_sampled_history_buffers()
 
     def _init_sampled_history_buffers(self):
@@ -38,22 +33,51 @@ def _init_sampled_history_buffers(self):
             self.cfg.control.ctrl_frequency / self.cfg.env.sampled_history_frequency
         )
 
+    def _apply_coupling(self, q, qd, q_des, qd_des, kp, kd, tau_ff):
+        # Create a Jacobian matrix and move it to the same device as input tensors
+        J = torch.eye(q.shape[-1]).to(q.device)
+        J[4, 3] = 1
+        J[9, 8] = 1
+
+        # Perform transformations using Jacobian
+        q = torch.matmul(q, J.T)
+        qd = torch.matmul(qd, J.T)
+        q_des = torch.matmul(q_des, J.T)
+        qd_des = torch.matmul(qd_des, J.T)
+
+        # Inverse of the transpose of Jacobian
+        J_inv_T = torch.inverse(J.T)
+
+        # Compute feed-forward torques
+        tau_ff = torch.matmul(J_inv_T, tau_ff.T).T
+
+        # Compute kp and kd terms
+        kp = torch.diagonal(
+            torch.matmul(
+                torch.matmul(J_inv_T, torch.diag_embed(kp, dim1=-2, dim2=-1)), J_inv_T.T
+            ),
+            dim1=-2,
+            dim2=-1,
+        )
+
+        kd = torch.diagonal(
+            torch.matmul(
+                torch.matmul(J_inv_T, torch.diag_embed(kd, dim1=-2, dim2=-1)), J_inv_T.T
+            ),
+            dim1=-2,
+            dim2=-1,
+        )
+
+        # Compute torques
+        torques = kp * (q_des - q) + kd * (qd_des - qd) + tau_ff
+        torques = torch.matmul(torques, J)
+
+        return torques
+
     def _reset_system(self, env_ids):
         if len(env_ids) == 0:
             return
         super()._reset_system(env_ids)
-        # reset oscillators, with a pi phase shift between left and right
-        self.oscillators[env_ids, 0] = (
-            torch.rand(len(env_ids), device=self.device) * 2 * torch.pi
-        )
-        self.oscillators[env_ids, 1] = self.oscillators[env_ids, 0] + torch.pi
-        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
-        # reset oscillator velocities to base freq
-        self.oscillator_freq[env_ids] = self.cfg.oscillator.base_frequency
-        # recompute oscillator observations
-        self.oscillator_obs = torch.cat(
-            (self.oscillators.cos(), self.oscillators.sin()), dim=1
-        )
         self._reset_sampled_history_buffers(env_ids)
         return
 
@@ -65,7 +89,7 @@ def _reset_sampled_history_buffers(self, ids):
 
     # compute_torques accounting for coupling, and filtering torques
     def _compute_torques(self):
-        torques = _apply_coupling(
+        torques = self._apply_coupling(
             self.dof_pos,
             self.dof_vel,
             self.dof_pos_target + self.default_dof_pos,
@@ -74,25 +98,12 @@ def _compute_torques(self):
             self.d_gains,
             self.tau_ff,
         )
-        torques = torques.clip(-self.torque_limits, self.torque_limits)
-        return exp_avg_filter(torques, self.torques, self.cfg.control.filter_gain)
-
-    # oscillator integration
+        return torques.clip(-self.torque_limits, self.torque_limits)
 
     def _post_decimation_step(self):
         super()._post_decimation_step()
-        self._step_oscillators()
         self._update_sampled_history_buffers()
 
-    def _step_oscillators(self, dt=None):
-        if dt is None:
-            dt = self.dt
-        self.oscillators += (self.oscillator_freq * 2 * torch.pi) * dt
-        self.oscillators = torch.remainder(self.oscillators, 2 * torch.pi)
-        self.oscillator_obs = torch.cat(
-            (torch.cos(self.oscillators), torch.sin(self.oscillators)), dim=1
-        )
-
     def _update_sampled_history_buffers(self):
         self.sampled_history_counter += 1
 
@@ -187,20 +198,6 @@ def _reward_stand_still(self):
         rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
         return rew_vel + rew_pos - rew_base_vel * self._switch("stand")
 
-    def _reward_stance(self):
-        # phase = torch.maximum(
-        #     torch.zeros_like(self.oscillators), -self.oscillators.sin()
-        # )  # positive during swing, negative during stance
-        phase = self.smooth_sqr_wave(self.oscillators)
-        return (phase * self._compute_grf()).mean(dim=1)
-
-    def _reward_swing(self):
-        # phase = torch.maximum(
-        #     torch.zeros_like(self.oscillators), self.oscillators.sin()
-        # )  # positive during swing, negative during stance
-        phase = self.smooth_sqr_wave(self.oscillators + torch.pi)
-        return -(phase * self._compute_grf()).mean(dim=1)
-
     def _compute_grf(self, grf_norm=True):
         grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
         if grf_norm:
@@ -211,16 +208,6 @@ def _compute_grf(self, grf_norm=True):
     def smooth_sqr_wave(self, phase, sigma=0.2):  # sigma=0 is step function
         return phase.sin() / (2 * torch.sqrt(phase.sin() ** 2.0 + sigma**2.0)) + 0.5
 
-    def _reward_walk_freq(self):
-        # Penalize deviation from base frequency
-        return torch.mean(
-            self._sqrdexp(
-                (self.oscillator_freq - self.cfg.oscillator.base_frequency)
-                / self.cfg.oscillator.base_frequency
-            ),
-            dim=1,
-        ) * self._switch("move")
-
     def _reward_hips_forward(self):
         # reward hip motors for pointing forward
         hip_yaw_abad = torch.cat((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)

From be2e990c6ec3d2887e3f89e856e66d5284f5bdad Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Wed, 4 Sep 2024 09:58:49 -0400
Subject: [PATCH 15/26] wip

---
 gym/envs/mit_humanoid/lander.py        | 16 +++++++++++++++
 gym/envs/mit_humanoid/lander_config.py | 28 +++++++++-----------------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/gym/envs/mit_humanoid/lander.py b/gym/envs/mit_humanoid/lander.py
index 972eb413..f86d30f4 100644
--- a/gym/envs/mit_humanoid/lander.py
+++ b/gym/envs/mit_humanoid/lander.py
@@ -2,12 +2,28 @@
 
 # from gym.envs.base.legged_robot import LeggedRobot
 from gym.envs.mit_humanoid.mit_humanoid import MIT_Humanoid
+from isaacgym.torch_utils import torch_rand_float
 
 
 class Lander(MIT_Humanoid):
     def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
         super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
 
+    def _resample_commands(self, env_ids):
+        """Randommly select commands of some environments
+
+        Args:
+            env_ids (List[int]): Environments ids for which new commands are needed
+        """
+        if len(env_ids) == 0:
+            return
+        super()._resample_commands(env_ids)
+        # * with 75% chance, reset to 0
+        self.commands[env_ids, :] *= (
+            torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+            < 0.25
+        ).unsqueeze(1)
+
     # --- rewards ---
 
     def _switch(self, mode=None):
diff --git a/gym/envs/mit_humanoid/lander_config.py b/gym/envs/mit_humanoid/lander_config.py
index 0de34b4c..8219d459 100644
--- a/gym/envs/mit_humanoid/lander_config.py
+++ b/gym/envs/mit_humanoid/lander_config.py
@@ -112,7 +112,7 @@ class control(LeggedRobotCfg.control):
         }  # [N*m*s/rad]
 
         ctrl_frequency = 100
-        desired_sim_frequency = 1000
+        desired_sim_frequency = 500
 
     # class oscillator:
     #     base_frequency = 3.0  # [Hz]
@@ -121,13 +121,13 @@ class commands:
         resampling_time = 10.0  # time before command are changed[s]
 
         class ranges:
-            lin_vel_x = [-0.0, 0.0]  # min max [m/s] [-0.75, 0.75]
-            lin_vel_y = 0.0  # max [m/s]
-            yaw_vel = 0.0  # max [rad/s]
+            lin_vel_x = [-2.0, 2.0]  # min max [m/s] [-0.75, 0.75]
+            lin_vel_y = 0.3  # max [m/s]
+            yaw_vel = 1.0  # max [rad/s]
 
     class push_robots:
         toggle = True
-        interval_s = 1
+        interval_s = 2
         max_push_vel_xy = 0.5
         push_box_dims = [0.1, 0.1, 0.3]  # x,y,z [m]
 
@@ -146,13 +146,9 @@ class asset(LeggedRobotCfg.asset):
         foot_name = "foot"
         penalize_contacts_on = ["arm", "hand", "shoulder"]
         terminate_after_contacts_on = ["base"]
-        end_effector_names = ["hand", "foot"]  # ??
         flip_visual_attachments = False
         self_collisions = 0  # 1 to disagble, 0 to enable...bitwise filter
         collapse_fixed_joints = False
-        # * see GymDofDriveModeFlags
-        # * (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
-        default_dof_drive_mode = 3
         fix_base_link = False
         disable_gravity = False
         disable_motors = False
@@ -279,24 +275,18 @@ class critic(LeggedRobotRunnerCfg.critic):
 
         class reward:
             class weights:
-                # tracking_lin_vel = 0.0
-                # tracking_ang_vel = 1.5
-                # orientation = 1.0
                 torques = 5.0e-5
-                power = 0  # 1.0e-2
+                power = 1e-6  # 1.0e-2
                 min_base_height = 1.5
+                lin_vel_xy = 1.0
                 action_rate = 1e-3
                 action_rate2 = 1e-3
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.0
                 dof_vel = 0.5
-                # stand_still = 0.25
                 dof_pos_limits = 0.25
-                dof_near_home = 0.25
-                # stance = 1.0
-                # swing = 1.0
+                dof_near_home = 0.75
                 hips_forward = 0.0
-                # walk_freq = 0.0  # 2.5
                 collision = 1.0
 
             class termination_weight:
@@ -330,6 +320,6 @@ class runner(LeggedRobotRunnerCfg.runner):
         algorithm_class_name = "PPO2"
         num_steps_per_env = 24
         max_iterations = 1000
-        run_name = "Standing"
+        run_name = "lander"
         experiment_name = "Humanoid"
         save_interval = 50

From 27991100e4388ea3df415b75f4b022a599747b51 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Wed, 4 Sep 2024 15:42:46 -0400
Subject: [PATCH 16/26] Refactor for action frequency handled in runner only
 for lander: needs to overload _check_terminations_and_timeouts to not
 actually reset so that reset can be handled only by runner (without breaking
 backwards compatibility)

Also keeps a list of rewards, for finer reward integration, and can also do traj stats
---
 gym/envs/mit_humanoid/lander.py        |   7 ++
 gym/envs/mit_humanoid/lander_config.py |  15 +--
 gym/utils/helpers.py                   |   2 +-
 learning/runners/my_runner.py          | 128 +++++++++++++++++++++----
 4 files changed, 127 insertions(+), 25 deletions(-)

diff --git a/gym/envs/mit_humanoid/lander.py b/gym/envs/mit_humanoid/lander.py
index f86d30f4..86d37321 100644
--- a/gym/envs/mit_humanoid/lander.py
+++ b/gym/envs/mit_humanoid/lander.py
@@ -24,6 +24,13 @@ def _resample_commands(self, env_ids):
             < 0.25
         ).unsqueeze(1)
 
+    def _check_terminations_and_timeouts(self):
+        """Check if environments need to be reset"""
+        contact_forces = self.contact_forces[:, self.termination_contact_indices, :]
+        self.terminated |= torch.any(torch.norm(contact_forces, dim=-1) > 1.0, dim=1)
+        self.timed_out = self.episode_length_buf >= self.max_episode_length
+        # self.to_be_reset = self.timed_out | self.terminated
+
     # --- rewards ---
 
     def _switch(self, mode=None):
diff --git a/gym/envs/mit_humanoid/lander_config.py b/gym/envs/mit_humanoid/lander_config.py
index 8219d459..48c2eb53 100644
--- a/gym/envs/mit_humanoid/lander_config.py
+++ b/gym/envs/mit_humanoid/lander_config.py
@@ -111,7 +111,7 @@ class control(LeggedRobotCfg.control):
             "elbow": 1.0,
         }  # [N*m*s/rad]
 
-        ctrl_frequency = 100
+        ctrl_frequency = 500
         desired_sim_frequency = 500
 
     # class oscillator:
@@ -218,13 +218,15 @@ class scaling(LeggedRobotCfg.scaling):
 
 class LanderRunnerCfg(LeggedRobotRunnerCfg):
     seed = -1
-    runner_class_name = "OnPolicyRunner"
+    runner_class_name = "MyRunner"
 
     class actor(LeggedRobotRunnerCfg.actor):
+        frequency = 100
         init_noise_std = 1.0
         hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-        activation = "tanh"
+        activation = ["elu", "elu", "tanh"]
+        layer_norm = True
         smooth_exploration = False
 
         obs = [
@@ -256,7 +258,8 @@ class noise:
     class critic(LeggedRobotRunnerCfg.critic):
         hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
-        activation = "tanh"
+        activation = "elu"
+        layer_norm = True
 
         obs = [
             # "base_height",
@@ -290,7 +293,7 @@ class weights:
                 collision = 1.0
 
             class termination_weight:
-                termination = 15
+                termination = 1.0
 
     class algorithm(LeggedRobotRunnerCfg.algorithm):
         # both
@@ -319,7 +322,7 @@ class runner(LeggedRobotRunnerCfg.runner):
         policy_class_name = "ActorCritic"
         algorithm_class_name = "PPO2"
         num_steps_per_env = 24
-        max_iterations = 1000
+        max_iterations = 500
         run_name = "lander"
         experiment_name = "Humanoid"
         save_interval = 50
diff --git a/gym/utils/helpers.py b/gym/utils/helpers.py
index 90af8bce..8e74d93c 100644
--- a/gym/utils/helpers.py
+++ b/gym/utils/helpers.py
@@ -191,7 +191,7 @@ def get_args(custom_parameters=None):
         {
             "name": "--task",
             "type": str,
-            "default": "cartpole",
+            "default": "lander",
             "help": "Resume training or start testing from a checkpoint. "
             "Overrides config file if provided.",
         },
diff --git a/learning/runners/my_runner.py b/learning/runners/my_runner.py
index d97dee8d..96eedc77 100644
--- a/learning/runners/my_runner.py
+++ b/learning/runners/my_runner.py
@@ -19,14 +19,18 @@ def __init__(self, env, train_cfg, device="cpu"):
             self.device,
         )
 
-    def learn(self):
+    def learn(self, states_to_log_dict=None):
         self.set_up_logger()
 
+        n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
+        assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+
         rewards_dict = {}
+        rewards_list = [dict() for x in range(n_policy_steps)]
 
         self.alg.switch_to_train()
-        actor_obs = self.get_obs(self.actor_cfg["actor_obs"])
-        critic_obs = self.get_obs(self.critic_cfg["critic_obs"])
+        actor_obs = self.get_obs(self.actor_cfg["obs"])
+        critic_obs = self.get_obs(self.critic_cfg["obs"])
         tot_iter = self.it + self.num_learning_iterations
         self.save()
 
@@ -35,8 +39,10 @@ def learn(self):
         transition.update(
             {
                 "actor_obs": actor_obs,
-                "actions": self.alg.act(actor_obs, critic_obs),
+                "next_actor_obs": actor_obs,
+                "actions": self.alg.act(actor_obs),
                 "critic_obs": critic_obs,
+                "next_critic_obs": critic_obs,
                 "rewards": self.get_rewards({"termination": 0.0})["termination"],
                 "dones": self.get_timed_out(),
             }
@@ -52,10 +58,17 @@ def learn(self):
         for self.it in range(self.it + 1, tot_iter + 1):
             logger.tic("iteration")
             logger.tic("collection")
+
+            # * Simulate environment and log states
+            if states_to_log_dict is not None:
+                it_idx = self.it - 1
+                if it_idx % 10 == 0:
+                    self.sim_and_log_states(states_to_log_dict, it_idx)
+
             # * Rollout
             with torch.inference_mode():
                 for i in range(self.num_steps_per_env):
-                    actions = self.alg.act(actor_obs, critic_obs)
+                    actions = self.alg.act(actor_obs)
                     self.set_actions(
                         self.actor_cfg["actions"],
                         actions,
@@ -69,33 +82,48 @@ def learn(self):
                             "critic_obs": critic_obs,
                         }
                     )
-
-                    self.env.step()
+                    for step in range(n_policy_steps):
+                        self.env.step()
+                        # put reward integration here
+                        self.update_rewards(rewards_list[step], self.env.terminated)
+                        # handle time-outs/terminations happening inside here with mask
+                    else:
+                        # catch and reset failed envs
+                        to_be_reset = self.env.timed_out | self.env.terminated
+                        env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                        self.env._reset_idx(env_ids)
 
                     actor_obs = self.get_noisy_obs(
-                        self.actor_cfg["actor_obs"], self.actor_cfg["noise"]
+                        self.actor_cfg["obs"], self.actor_cfg["noise"]
                     )
-                    critic_obs = self.get_obs(self.critic_cfg["critic_obs"])
+                    critic_obs = self.get_obs(self.critic_cfg["obs"])
+
                     # * get time_outs
-                    timed_out = self.get_timed_out()
-                    terminated = self.get_terminated()
-                    dones = timed_out | terminated
 
-                    self.update_rewards(rewards_dict, terminated)
-                    total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
+                    self.update_rewards(rewards_dict, self.env.terminated)
+
+                    # integrate rewards from list
+                    total_rewards = torch.stack(
+                        tuple(
+                            torch.stack(tuple(rewards_dict.values()))
+                            for rewards_dict in rewards_list
+                        )
+                    ).sum(dim=(0, 1))
 
                     transition.update(
                         {
+                            "next_actor_obs": actor_obs,
+                            "next_critic_obs": critic_obs,
                             "rewards": total_rewards,
-                            "timed_out": timed_out,
-                            "dones": dones,
+                            "timed_out": self.env.timed_out,
+                            "dones": self.env.timed_out | self.env.terminated,
                         }
                     )
                     storage.add_transitions(transition)
 
                     logger.log_rewards(rewards_dict)
                     logger.log_rewards({"total_rewards": total_rewards})
-                    logger.finish_step(dones)
+                    logger.finish_step(self.env.timed_out | self.env.terminated)
             logger.toc("collection")
 
             logger.tic("learning")
@@ -113,6 +141,21 @@ def learn(self):
                 self.save()
         self.save()
 
+    @torch.no_grad
+    def burn_in_normalization(self, n_iterations=100):
+        actor_obs = self.get_obs(self.actor_cfg["obs"])
+        critic_obs = self.get_obs(self.critic_cfg["obs"])
+        for _ in range(n_iterations):
+            actions = self.alg.act(actor_obs)
+            self.set_actions(self.actor_cfg["actions"], actions)
+            self.env.step()
+            actor_obs = self.get_noisy_obs(
+                self.actor_cfg["obs"], self.actor_cfg["noise"]
+            )
+            critic_obs = self.get_obs(self.critic_cfg["obs"])
+            self.alg.critic.evaluate(critic_obs)
+        self.env.reset()
+
     def set_up_logger(self):
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
@@ -120,8 +163,57 @@ def set_up_logger(self):
         )
         logger.register_rewards(["total_rewards"])
         logger.register_category(
-            "algorithm", self.alg, ["mean_value_loss", "mean_surrogate_loss"]
+            "algorithm",
+            self.alg,
+            ["mean_value_loss", "mean_surrogate_loss", "learning_rate"],
         )
         logger.register_category("actor", self.alg.actor, ["action_std", "entropy"])
 
         logger.attach_torch_obj_to_wandb((self.alg.actor, self.alg.critic))
+
+    def update_rewards(self, rewards_dict, terminated):
+        # sum existing rewards with new rewards
+
+        rewards_dict.update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["termination_weight"], mask=terminated
+            )
+        )
+        rewards_dict.update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["weights"],
+                modifier=self.env.dt,
+                mask=~terminated,
+            )
+        )
+
+    def sim_and_log_states(self, states_to_log_dict, it_idx):
+        # Simulate environment for as many steps as expected in the dict.
+        # Log states to the dict, as well as whether the env terminated.
+        steps = states_to_log_dict["terminated"].shape[2]
+        actor_obs = self.get_obs(self.policy_cfg["actor_obs"])
+
+        with torch.inference_mode():
+            for i in range(steps):
+                actions = self.alg.act(actor_obs)
+                self.set_actions(
+                    self.policy_cfg["actions"],
+                    actions,
+                    self.policy_cfg["disable_actions"],
+                )
+
+                self.env.step()
+
+                actor_obs = self.get_noisy_obs(
+                    self.policy_cfg["actor_obs"], self.policy_cfg["noise"]
+                )
+
+                # Log states (just for the first env)
+                terminated = self.get_terminated()[0]
+                for state in states_to_log_dict:
+                    if state == "terminated":
+                        states_to_log_dict[state][0, it_idx, i, :] = terminated
+                    else:
+                        states_to_log_dict[state][0, it_idx, i, :] = getattr(
+                            self.env, state
+                        )[0, :]

From f105c0eabc784fa5f93e487f29a78fe752d3af47 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 9 Sep 2024 09:31:47 -0400
Subject: [PATCH 17/26] update nn_params

---
 gym/envs/pendulum/pendulum_SAC_config.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/gym/envs/pendulum/pendulum_SAC_config.py b/gym/envs/pendulum/pendulum_SAC_config.py
index 0125cee4..836a6185 100644
--- a/gym/envs/pendulum/pendulum_SAC_config.py
+++ b/gym/envs/pendulum/pendulum_SAC_config.py
@@ -43,10 +43,7 @@ class actor(FixedRobotCfgPPO.actor):
         nn_params = {"latent": latent_nn, "mean": mean_nn, "std": std_nn}
 
         normalize_obs = False
-        obs = [
-            "dof_pos_obs",
-            "dof_vel",
-        ]
+        obs = ["dof_pos_obs", "dof_vel"]
         actions = ["tau_ff"]
         disable_actions = False
 

From c04b563f03a540a599fdee1522667e1eb4b7dda9 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 9 Sep 2024 14:06:18 -0400
Subject: [PATCH 18/26] apply stash w/ sim-freq reward sampling. Adjust some
 reward weights of action_rate (mini-cheetah) to work decently.

---
 gym/envs/base/task_skeleton.py                |  8 +-
 .../mini_cheetah/mini_cheetah_osc_config.py   |  7 +-
 gym/envs/mini_cheetah/mini_cheetah_ref.py     |  9 ++
 .../mini_cheetah/mini_cheetah_ref_config.py   |  9 +-
 gym/envs/mit_humanoid/lander_config.py        |  2 +-
 gym/utils/task_registry.py                    |  3 +
 learning/runners/my_runner.py                 | 57 +++++++----
 learning/runners/off_policy_runner.py         | 95 ++++++++++++-------
 learning/runners/on_policy_runner.py          | 69 +++++++++-----
 9 files changed, 178 insertions(+), 81 deletions(-)

diff --git a/gym/envs/base/task_skeleton.py b/gym/envs/base/task_skeleton.py
index 0974970a..4f931eab 100644
--- a/gym/envs/base/task_skeleton.py
+++ b/gym/envs/base/task_skeleton.py
@@ -5,10 +5,14 @@
 
 
 class TaskSkeleton:
-    def __init__(self, num_envs=1, max_episode_length=1.0, device="cpu"):
+    def __init__(
+        self, num_envs=1, max_episode_length=1.0, device="cpu", auto_reset=True
+    ):
         self.num_envs = num_envs
         self.max_episode_length = max_episode_length
         self.device = device
+        self.auto_reset = auto_reset
+
         return None
 
     def get_states(self, obs_list):
@@ -70,7 +74,7 @@ def _check_terminations_and_timeouts(self):
         contact_forces = self.contact_forces[:, self.termination_contact_indices, :]
         self.terminated |= torch.any(torch.norm(contact_forces, dim=-1) > 1.0, dim=1)
         self.timed_out = self.episode_length_buf >= self.max_episode_length
-        self.to_be_reset = self.timed_out | self.terminated
+        # self.to_be_reset = self.timed_out | self.terminated
 
     def step(self, actions):
         raise NotImplementedError
diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
index ec283033..34996404 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
@@ -64,7 +64,7 @@ class control(MiniCheetahCfg.control):
         # * PD Drive parameters:
         stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0}
         damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5}
-        ctrl_frequency = 100
+        ctrl_frequency = 500
         desired_sim_frequency = 500
 
     class osc:
@@ -163,6 +163,7 @@ class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg):
     runner_class_name = "OnPolicyRunner"
 
     class actor(MiniCheetahRunnerCfg.actor):
+        frequency = 100
         hidden_dims = [256, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
@@ -221,8 +222,8 @@ class weights:
                 dof_vel = 0.0
                 min_base_height = 1.5
                 collision = 0
-                action_rate = 0.1  # -0.01
-                action_rate2 = 0.01  # -0.001
+                action_rate = 0.001  # -0.01
+                action_rate2 = 0.0001  # -0.001
                 stand_still = 0.0
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref.py b/gym/envs/mini_cheetah/mini_cheetah_ref.py
index 57e1f0e7..d3f8a89b 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref.py
@@ -50,6 +50,15 @@ def _resample_commands(self, env_ids):
         ).squeeze(1)
         self.commands[env_ids, :3] *= (rand_ids < 0.9).unsqueeze(1)
 
+    def _check_terminations_and_timeouts(self):
+        """Check if environments need to be reset"""
+        contact_forces = self.contact_forces[:, self.termination_contact_indices, :]
+        self.terminated |= torch.any(torch.norm(contact_forces, dim=-1) > 1.0, dim=1)
+        self.timed_out = self.episode_length_buf >= self.max_episode_length
+        # self.to_be_reset = self.timed_out | self.terminated
+
+    # ---
+
     def _switch(self):
         c_vel = torch.linalg.norm(self.commands, dim=1)
         return torch.exp(
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
index 1803c765..b8a7a2fe 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -26,7 +26,7 @@ class control(MiniCheetahCfg.control):
         stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0}
         damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5}
         gait_freq = 3.0
-        ctrl_frequency = 100
+        ctrl_frequency = 500
         desired_sim_frequency = 500
 
     class commands(MiniCheetahCfg.commands):
@@ -70,6 +70,7 @@ class MiniCheetahRefRunnerCfg(MiniCheetahRunnerCfg):
     runner_class_name = "OnPolicyRunner"
 
     class actor(MiniCheetahRunnerCfg.actor):
+        frequency = 100
         hidden_dims = [256, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
@@ -122,12 +123,12 @@ class weights:
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.01
                 orientation = 1.0
-                torques = 5.0e-7
+                torques = 5.0e-8
                 dof_vel = 0.0
                 min_base_height = 1.5
                 collision = 0.0
-                action_rate = 0.1
-                action_rate2 = 0.01
+                action_rate = 1e-4
+                action_rate2 = 1e-5
                 stand_still = 0.0
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
diff --git a/gym/envs/mit_humanoid/lander_config.py b/gym/envs/mit_humanoid/lander_config.py
index 48c2eb53..86f3329f 100644
--- a/gym/envs/mit_humanoid/lander_config.py
+++ b/gym/envs/mit_humanoid/lander_config.py
@@ -282,7 +282,7 @@ class weights:
                 power = 1e-6  # 1.0e-2
                 min_base_height = 1.5
                 lin_vel_xy = 1.0
-                action_rate = 1e-3
+                action_rate = 1e-2
                 action_rate2 = 1e-3
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.0
diff --git a/gym/utils/task_registry.py b/gym/utils/task_registry.py
index 6c2025ba..cc0787ca 100644
--- a/gym/utils/task_registry.py
+++ b/gym/utils/task_registry.py
@@ -178,6 +178,9 @@ def set_control_and_sim_dt(self, env_cfg, train_cfg):
                 f" to {env_cfg.sim_dt}."
             )
 
+        if not hasattr(train_cfg.actor, "frequency"):
+            train_cfg.actor.frequency = env_cfg.control.ctrl_frequency
+
     def set_discount_rates(self, train_cfg, dt):
         if hasattr(train_cfg.algorithm, "discount_horizon"):
             hrzn = train_cfg.algorithm.discount_horizon
diff --git a/learning/runners/my_runner.py b/learning/runners/my_runner.py
index 15192d2d..4c2f5d52 100644
--- a/learning/runners/my_runner.py
+++ b/learning/runners/my_runner.py
@@ -39,8 +39,7 @@ def learn(self, states_to_log_dict=None):
         n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
         assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
 
-        rewards_dict = {}
-        rewards_list = [dict() for x in range(n_policy_steps)]
+        rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
         self.alg.switch_to_train()
         actor_obs = self.get_obs(self.actor_cfg["obs"])
@@ -104,31 +103,21 @@ def learn(self, states_to_log_dict=None):
                     for step in range(n_policy_steps):
                         self.env.step()
                         # put reward integration here
-                        self.update_rewards(rewards_list[step], self.env.terminated)
-                        # handle time-outs/terminations happening inside here with mask
+                        self.update_rewards_dict(rewards_dict, step)
                     else:
                         # catch and reset failed envs
                         to_be_reset = self.env.timed_out | self.env.terminated
                         env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
                         self.env._reset_idx(env_ids)
 
+                    total_rewards = torch.stack(
+                        tuple(rewards_dict.sum(dim=0).values())
+                    ).sum(dim=(0))
                     actor_obs = self.get_noisy_obs(
                         self.actor_cfg["obs"], self.actor_cfg["noise"]
                     )
                     critic_obs = self.get_obs(self.critic_cfg["obs"])
 
-                    # * get time_outs
-
-                    self.update_rewards(rewards_dict, self.env.terminated)
-
-                    # integrate rewards from list
-                    total_rewards = torch.stack(
-                        tuple(
-                            torch.stack(tuple(rewards_dict.values()))
-                            for rewards_dict in rewards_list
-                        )
-                    ).sum(dim=(0, 1))
-
                     transition.update(
                         {
                             "next_actor_obs": actor_obs,
@@ -140,7 +129,7 @@ def learn(self, states_to_log_dict=None):
                     )
                     storage.add_transitions(transition)
 
-                    logger.log_rewards(rewards_dict)
+                    logger.log_rewards(rewards_dict.sum(dim=0))
                     logger.log_rewards({"total_rewards": total_rewards})
                     logger.finish_step(self.env.timed_out | self.env.terminated)
             logger.toc("collection")
@@ -205,6 +194,40 @@ def update_rewards(self, rewards_dict, terminated):
             )
         )
 
+    def initialize_rewards_dict(self, n_steps):
+        # sum existing rewards with new rewards
+        rewards_dict = TensorDict(
+            {}, batch_size=(n_steps, self.env.num_envs), device=self.device
+        )
+        for key in self.critic_cfg["reward"]["termination_weight"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        for key in self.critic_cfg["reward"]["weights"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        return rewards_dict
+
+    def update_rewards_dict(self, rewards_dict, step):
+        # sum existing rewards with new rewards
+        rewards_dict[step].update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["termination_weight"],
+                modifier=self.env.dt,
+                mask=self.env.terminated,
+            ),
+            inplace=True,
+        )
+        rewards_dict[step].update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["weights"],
+                modifier=self.env.dt,
+                mask=~self.env.terminated,
+            ),
+            inplace=True,
+        )
+
     def sim_and_log_states(self, states_to_log_dict, it_idx):
         # Simulate environment for as many steps as expected in the dict.
         # Log states to the dict, as well as whether the env terminated.
diff --git a/learning/runners/off_policy_runner.py b/learning/runners/off_policy_runner.py
index 7aed58e7..516c4a3e 100644
--- a/learning/runners/off_policy_runner.py
+++ b/learning/runners/off_policy_runner.py
@@ -48,7 +48,10 @@ def _set_up_alg(self):
     def learn(self):
         self.set_up_logger()
 
-        rewards_dict = {}
+        n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
+        assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+
+        rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
         self.alg.switch_to_train()
         actor_obs = self.get_obs(self.actor_cfg["obs"])
@@ -94,28 +97,32 @@ def learn(self):
                     }
                 )
 
-                self.env.step()
+                for step in range(n_policy_steps):
+                    self.env.step()
+                    # put reward integration here
+                    self.update_rewards_dict(rewards_dict, step)
+                else:
+                    # catch and reset failed envs
+                    to_be_reset = self.env.timed_out | self.env.terminated
+                    env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                    self.env._reset_idx(env_ids)
+
+                total_rewards = torch.stack(
+                    tuple(rewards_dict.sum(dim=0).values())
+                ).sum(dim=(0))
 
                 actor_obs = self.get_noisy_obs(
                     self.actor_cfg["obs"], self.actor_cfg["noise"]
                 )
                 critic_obs = self.get_obs(self.critic_cfg["obs"])
 
-                # * get time_outs
-                timed_out = self.get_timed_out()
-                terminated = self.get_terminated()
-                dones = timed_out | terminated
-
-                self.update_rewards(rewards_dict, terminated)
-                total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
-
                 transition.update(
                     {
                         "next_actor_obs": actor_obs,
                         "next_critic_obs": critic_obs,
                         "rewards": total_rewards,
-                        "timed_out": timed_out,
-                        "dones": dones,
+                        "timed_out": self.env.timed_out,
+                        "dones": self.env.timed_out | self.env.terminated,
                     }
                 )
                 storage.add_transitions(transition)
@@ -147,35 +154,39 @@ def learn(self):
                         }
                     )
 
-                    self.env.step()
+                    for step in range(n_policy_steps):
+                        self.env.step()
+                        # put reward integration here
+                        self.update_rewards_dict(rewards_dict, step)
+                    else:
+                        # catch and reset failed envs
+                        to_be_reset = self.env.timed_out | self.env.terminated
+                        env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                        self.env._reset_idx(env_ids)
+
+                    total_rewards = torch.stack(
+                        tuple(rewards_dict.sum(dim=0).values())
+                    ).sum(dim=(0))
 
                     actor_obs = self.get_noisy_obs(
                         self.actor_cfg["obs"], self.actor_cfg["noise"]
                     )
                     critic_obs = self.get_obs(self.critic_cfg["obs"])
 
-                    # * get time_outs
-                    timed_out = self.get_timed_out()
-                    terminated = self.get_terminated()
-                    dones = timed_out | terminated
-
-                    self.update_rewards(rewards_dict, terminated)
-                    total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
-
                     transition.update(
                         {
                             "next_actor_obs": actor_obs,
                             "next_critic_obs": critic_obs,
                             "rewards": total_rewards,
-                            "timed_out": timed_out,
-                            "dones": dones,
+                            "timed_out": self.env.timed_out,
+                            "dones": self.env.timed_out | self.env.terminated,
                         }
                     )
                     storage.add_transitions(transition)
 
-                    logger.log_rewards(rewards_dict)
+                    logger.log_rewards(rewards_dict.sum(dim=0))
                     logger.log_rewards({"total_rewards": total_rewards})
-                    logger.finish_step(dones)
+                    logger.finish_step(self.env.timed_out | self.env.terminated)
             logger.toc("collection")
 
             logger.tic("learning")
@@ -192,20 +203,40 @@ def learn(self):
                 self.save()
         self.save()
 
-    def update_rewards(self, rewards_dict, terminated):
-        rewards_dict.update(
+    def update_rewards_dict(self, rewards_dict, step):
+        # sum existing rewards with new rewards
+        rewards_dict[step].update(
             self.get_rewards(
-                self.critic_cfg["reward"]["termination_weight"], mask=terminated
-            )
+                self.critic_cfg["reward"]["termination_weight"],
+                modifier=self.env.dt,
+                mask=self.env.terminated,
+            ),
+            inplace=True,
         )
-        rewards_dict.update(
+        rewards_dict[step].update(
             self.get_rewards(
                 self.critic_cfg["reward"]["weights"],
                 modifier=self.env.dt,
-                mask=~terminated,
-            )
+                mask=~self.env.terminated,
+            ),
+            inplace=True,
         )
 
+    def initialize_rewards_dict(self, n_steps):
+        # sum existing rewards with new rewards
+        rewards_dict = TensorDict(
+            {}, batch_size=(n_steps, self.env.num_envs), device=self.device
+        )
+        for key in self.critic_cfg["reward"]["termination_weight"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        for key in self.critic_cfg["reward"]["weights"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        return rewards_dict
+
     def set_up_logger(self):
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
diff --git a/learning/runners/on_policy_runner.py b/learning/runners/on_policy_runner.py
index 0a81680d..7167f03e 100644
--- a/learning/runners/on_policy_runner.py
+++ b/learning/runners/on_policy_runner.py
@@ -24,7 +24,10 @@ def __init__(self, env, train_cfg, device="cpu"):
     def learn(self, states_to_log_dict=None):
         self.set_up_logger()
 
-        rewards_dict = {}
+        n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
+        assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+
+        rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
         self.alg.switch_to_train()
         actor_obs = self.get_obs(self.actor_cfg["obs"])
@@ -86,37 +89,39 @@ def learn(self, states_to_log_dict=None):
                             "critic_obs": critic_obs,
                         }
                     )
+                    for step in range(n_policy_steps):
+                        self.env.step()
+                        # put reward integration here
+                        self.update_rewards_dict(rewards_dict, step)
+                    else:
+                        # catch and reset failed envs
+                        to_be_reset = self.env.timed_out | self.env.terminated
+                        env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                        self.env._reset_idx(env_ids)
 
-                    self.env.step()
+                    total_rewards = torch.stack(
+                        tuple(rewards_dict.sum(dim=0).values())
+                    ).sum(dim=(0))
 
                     actor_obs = self.get_noisy_obs(
                         self.actor_cfg["obs"], self.actor_cfg["noise"]
                     )
                     critic_obs = self.get_obs(self.critic_cfg["obs"])
 
-                    # * get time_outs
-                    timed_out = self.get_timed_out()
-                    terminated = self.get_terminated()
-                    dones = timed_out | terminated
-
-                    self.update_rewards(rewards_dict, terminated)
-                    total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
-
                     transition.update(
                         {
                             "next_actor_obs": actor_obs,
                             "next_critic_obs": critic_obs,
                             "rewards": total_rewards,
-                            "timed_out": timed_out,
-                            "terminated": terminated,
-                            "dones": dones,
+                            "timed_out": self.env.timed_out,
+                            "dones": self.env.timed_out | self.env.terminated,
                         }
                     )
                     storage.add_transitions(transition)
 
-                    logger.log_rewards(rewards_dict)
+                    logger.log_rewards(rewards_dict.sum(dim=0))
                     logger.log_rewards({"total_rewards": total_rewards})
-                    logger.finish_step(dones)
+                    logger.finish_step(self.env.timed_out | self.env.terminated)
             logger.toc("collection")
 
             logger.tic("learning")
@@ -149,19 +154,39 @@ def burn_in_normalization(self, n_iterations=100):
             self.alg.critic.evaluate(critic_obs)
         self.env.reset()
 
-    def update_rewards(self, rewards_dict, terminated):
-        rewards_dict.update(
+    def update_rewards_dict(self, rewards_dict, step):
+        # sum existing rewards with new rewards
+        rewards_dict[step].update(
             self.get_rewards(
-                self.critic_cfg["reward"]["termination_weight"], mask=terminated
-            )
+                self.critic_cfg["reward"]["termination_weight"],
+                modifier=self.env.dt,
+                mask=self.env.terminated,
+            ),
+            inplace=True,
         )
-        rewards_dict.update(
+        rewards_dict[step].update(
             self.get_rewards(
                 self.critic_cfg["reward"]["weights"],
                 modifier=self.env.dt,
-                mask=~terminated,
-            )
+                mask=~self.env.terminated,
+            ),
+            inplace=True,
+        )
+
+    def initialize_rewards_dict(self, n_steps):
+        # sum existing rewards with new rewards
+        rewards_dict = TensorDict(
+            {}, batch_size=(n_steps, self.env.num_envs), device=self.device
         )
+        for key in self.critic_cfg["reward"]["termination_weight"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        for key in self.critic_cfg["reward"]["weights"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        return rewards_dict
 
     def set_up_logger(self):
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))

From e3956738f7ae709874a889b7868abd5fc38dd3f3 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 9 Sep 2024 14:23:38 -0400
Subject: [PATCH 19/26] refactor skeleton with a super_init fix, and
 pre-initialize reward buffer. Doesn't seem to make a difference in speed.

---
 gym/envs/base/base_task.py     | 19 +++----------------
 gym/envs/base/task_skeleton.py | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/gym/envs/base/base_task.py b/gym/envs/base/base_task.py
index 551143a3..033e0eeb 100644
--- a/gym/envs/base/base_task.py
+++ b/gym/envs/base/base_task.py
@@ -18,33 +18,20 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
         # * env device is GPU only if sim is on GPU and use_gpu_pipeline=True,
         # * otherwise returned tensors are copied to CPU by physX.
         if sim_device_type == "cuda" and sim_params.use_gpu_pipeline:
-            self.device = self.sim_device
+            device = self.sim_device
         else:
-            self.device = "cpu"
+            device = "cpu"
 
         # * graphics device for rendering, -1 for no rendering
         self.graphics_device_id = self.sim_device_id
 
-        self.num_envs = cfg.env.num_envs
         self.num_actuators = cfg.env.num_actuators
 
         # * optimization flags for pytorch JIT
         torch._C._jit_set_profiling_mode(False)
         torch._C._jit_set_profiling_executor(False)
 
-        # allocate buffers
-        self.to_be_reset = torch.ones(
-            self.num_envs, device=self.device, dtype=torch.bool
-        )
-        self.terminated = torch.ones(
-            self.num_envs, device=self.device, dtype=torch.bool
-        )
-        self.episode_length_buf = torch.zeros(
-            self.num_envs, device=self.device, dtype=torch.long
-        )
-        self.timed_out = torch.zeros(
-            self.num_envs, device=self.device, dtype=torch.bool
-        )
+        super().__init__(num_envs=cfg.env.num_envs, device=device)
 
         # todo: read from config
         self.enable_viewer_sync = True
diff --git a/gym/envs/base/task_skeleton.py b/gym/envs/base/task_skeleton.py
index 4f931eab..d3585e45 100644
--- a/gym/envs/base/task_skeleton.py
+++ b/gym/envs/base/task_skeleton.py
@@ -5,13 +5,15 @@
 
 
 class TaskSkeleton:
-    def __init__(
-        self, num_envs=1, max_episode_length=1.0, device="cpu", auto_reset=True
-    ):
+    def __init__(self, num_envs=1, device="cpu"):
         self.num_envs = num_envs
-        self.max_episode_length = max_episode_length
         self.device = device
-        self.auto_reset = auto_reset
+
+        self.to_be_reset = torch.ones(num_envs, device=device, dtype=torch.bool)
+        self.terminated = torch.ones(num_envs, device=device, dtype=torch.bool)
+        self.episode_length_buf = torch.zeros(num_envs, device=device, dtype=torch.long)
+        self.timed_out = torch.zeros(num_envs, device=device, dtype=torch.bool)
+        self.reward_buf = torch.zeros(num_envs, device=device, dtype=torch.float)
 
         return None
 
@@ -61,10 +63,11 @@ def compute_reward(self, reward_weights):
         reward_weights: dict with keys matching reward names, and values
             matching weights
         """
-        reward = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
+        # reward = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
+        self.reward_buf[:] = 0
         for name, weight in reward_weights.items():
-            reward += weight * self._eval_reward(name)
-        return reward
+            self.reward_buf += weight * self._eval_reward(name)
+        return self.reward_buf
 
     def _eval_reward(self, name):
         return eval("self._reward_" + name + "()")

From ea5cdff304ef6edc78dbd66e4ecfcd9d7d4cccb1 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 9 Sep 2024 15:09:09 -0400
Subject: [PATCH 20/26] refactor: redo rewards computation, with a dict of
 reward functions in the runner. Seems to make a tiny difference in speed.
 More importantly, it removes the `eval` hack, which means we can profile
 individual reward functions. THIS BREAKS PBRS

---
 gym/envs/base/task_skeleton.py | 15 ---------------
 learning/runners/BaseRunner.py | 15 +++++++++++----
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/gym/envs/base/task_skeleton.py b/gym/envs/base/task_skeleton.py
index d3585e45..4dfcb280 100644
--- a/gym/envs/base/task_skeleton.py
+++ b/gym/envs/base/task_skeleton.py
@@ -13,7 +13,6 @@ def __init__(self, num_envs=1, device="cpu"):
         self.terminated = torch.ones(num_envs, device=device, dtype=torch.bool)
         self.episode_length_buf = torch.zeros(num_envs, device=device, dtype=torch.long)
         self.timed_out = torch.zeros(num_envs, device=device, dtype=torch.bool)
-        self.reward_buf = torch.zeros(num_envs, device=device, dtype=torch.float)
 
         return None
 
@@ -58,20 +57,6 @@ def _reset_buffers(self):
         self.terminated[:] = False
         self.timed_out[:] = False
 
-    def compute_reward(self, reward_weights):
-        """Compute and return a torch tensor of rewards
-        reward_weights: dict with keys matching reward names, and values
-            matching weights
-        """
-        # reward = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
-        self.reward_buf[:] = 0
-        for name, weight in reward_weights.items():
-            self.reward_buf += weight * self._eval_reward(name)
-        return self.reward_buf
-
-    def _eval_reward(self, name):
-        return eval("self._reward_" + name + "()")
-
     def _check_terminations_and_timeouts(self):
         """Check if environments need to be reset"""
         contact_forces = self.contact_forces[:, self.termination_contact_indices, :]
diff --git a/learning/runners/BaseRunner.py b/learning/runners/BaseRunner.py
index 01a1b825..1545293b 100644
--- a/learning/runners/BaseRunner.py
+++ b/learning/runners/BaseRunner.py
@@ -8,8 +8,8 @@ class BaseRunner:
     def __init__(self, env, train_cfg, device="cpu"):
         self.device = device
         self.env = env
+        self.setup_reward_functions()
         self.parse_train_cfg(train_cfg)
-
         self.num_steps_per_env = self.cfg["num_steps_per_env"]
         self.save_interval = self.cfg["save_interval"]
         self.num_learning_iterations = self.cfg["max_iterations"]
@@ -18,6 +18,13 @@ def __init__(self, env, train_cfg, device="cpu"):
         self.log_dir = train_cfg["log_dir"]
         self._set_up_alg()
 
+    def setup_reward_functions(self):
+        self.reward_functions = {
+            method.replace("_reward_", ""): getattr(self.env, method)
+            for method in dir(self.env)
+            if callable(getattr(self.env, method)) and method.startswith("_reward_")
+        }
+
     def _set_up_alg(self):
         num_actor_obs = self.get_obs_size(self.actor_cfg["obs"])
         num_actions = self.get_action_size(self.actor_cfg["actions"])
@@ -95,8 +102,8 @@ def get_rewards(self, reward_weights, modifier=1, mask=None):
         if mask is None:
             mask = 1.0
         for name, weight in reward_weights.items():
-            rewards_dict[name] = mask * self._get_reward({name: weight}, modifier)
+            rewards_dict[name] = mask * self._get_reward(name, weight * modifier)
         return rewards_dict
 
-    def _get_reward(self, name_weight, modifier=1):
-        return modifier * self.env.compute_reward(name_weight).to(self.device)
+    def _get_reward(self, name, weight):
+        return weight * self.reward_functions[name]().to(self.device)

From 7296c6c48903a5458c2e3d4e2c50e72bf430d97a Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Mon, 9 Sep 2024 15:58:57 -0400
Subject: [PATCH 21/26] compute switch once per decimation (speedup ~10%)

---
 gym/envs/mini_cheetah/mini_cheetah_osc.py     | 20 +++++++++++++------
 gym/envs/mini_cheetah/mini_cheetah_ref.py     | 19 ++++++++++--------
 .../mini_cheetah/mini_cheetah_ref_config.py   |  1 +
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py
index acec0b8a..6d44e35d 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_osc.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py
@@ -16,6 +16,9 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
 
     def _init_buffers(self):
         super()._init_buffers()
+
+        self._switch = torch.zeros(self.num_envs, 1, device=self.device)
+
         self.oscillators = torch.zeros(self.num_envs, 4, device=self.device)
         self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device)
 
@@ -158,6 +161,7 @@ def _post_decimation_step(self):
         """Update all states that are not handled in PhysX"""
         super()._post_decimation_step()
         self.grf = self._compute_grf()
+        self._update_cmd_switch()
         # self._step_oscillators()
 
     def _post_physx_step(self):
@@ -266,12 +270,16 @@ def _compute_grf(self, grf_norm=True):
         else:
             return grf
 
-    def _switch(self):
+    def _update_cmd_switch(self):
         c_vel = torch.linalg.norm(self.commands, dim=1)
-        return torch.exp(
+        self._switch = torch.exp(
             -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
             / self.cfg.reward_settings.switch_scale
         )
+        # return torch.exp(
+        #     -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
+        #     / self.cfg.reward_settings.switch_scale
+        # )
 
     def _reward_cursorial(self):
         # penalize the abad joints being away from 0
@@ -357,10 +365,10 @@ def _reward_stance_velocity(self):
         return -torch.mean(rew, dim=1)
 
     def _reward_dof_vel(self):
-        return super()._reward_dof_vel() * self._switch()
+        return super()._reward_dof_vel() * self._switch
 
     def _reward_dof_near_home(self):
-        return super()._reward_dof_near_home() * self._switch()
+        return super()._reward_dof_near_home() * self._switch
 
     def _reward_stand_still(self):
         """Penalize motion at zero commands"""
@@ -371,11 +379,11 @@ def _reward_stand_still(self):
         rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
         rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
         rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
-        return (rew_vel + rew_pos - rew_base_vel) * self._switch()
+        return (rew_vel + rew_pos - rew_base_vel) * self._switch
 
     def _reward_standing_torques(self):
         """Penalize torques at zero commands"""
-        return super()._reward_torques() * self._switch()
+        return super()._reward_torques() * self._switch
 
     # * gait similarity scores
     def angle_difference(self, theta1, theta2):
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref.py b/gym/envs/mini_cheetah/mini_cheetah_ref.py
index d3f8a89b..0084a80c 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref.py
@@ -17,6 +17,7 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
 
     def _init_buffers(self):
         super()._init_buffers()
+        self._switch = torch.zeros(self.num_envs, 1, device=self.device)
         self.phase = torch.zeros(
             self.num_envs, 1, dtype=torch.float, device=self.device
         )
@@ -41,6 +42,7 @@ def _post_decimation_step(self):
         self.phase_obs = torch.cat(
             (torch.sin(self.phase), torch.cos(self.phase)), dim=1
         )
+        self._update_cmd_switch()
 
     def _resample_commands(self, env_ids):
         super()._resample_commands(env_ids)
@@ -59,10 +61,11 @@ def _check_terminations_and_timeouts(self):
 
     # ---
 
-    def _switch(self):
+    def _update_cmd_switch(self):
         c_vel = torch.linalg.norm(self.commands, dim=1)
-        return torch.exp(
-            -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1)) / 0.1
+        self._switch = torch.exp(
+            -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
+            / self.cfg.reward_settings.switch_scale
         )
 
     def _reward_swing_grf(self):
@@ -73,7 +76,7 @@ def _reward_swing_grf(self):
         )
         ph_off = torch.lt(self.phase, torch.pi)
         rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)
-        return -torch.sum(rew.float(), dim=1) * (1 - self._switch())
+        return -torch.sum(rew.float(), dim=1) * (1 - self._switch)
 
     def _reward_stance_grf(self):
         """Reward non-zero grf during stance (pi to 2pi)"""
@@ -84,7 +87,7 @@ def _reward_stance_grf(self):
         ph_off = torch.gt(self.phase, torch.pi)  # should this be in swing?
         rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)
 
-        return torch.sum(rew.float(), dim=1) * (1 - self._switch())
+        return torch.sum(rew.float(), dim=1) * (1 - self._switch)
 
     def _reward_reference_traj(self):
         """REWARDS EACH LEG INDIVIDUALLY BASED ON ITS POSITION IN THE CYCLE"""
@@ -93,7 +96,7 @@ def _reward_reference_traj(self):
         error /= self.scales["dof_pos"]
         reward = (self._sqrdexp(error) - torch.abs(error) * 0.2).mean(dim=1)
         # * only when commanded velocity is higher
-        return reward * (1 - self._switch())
+        return reward * (1 - self._switch)
 
     def _get_ref(self):
         leg_frame = torch.zeros_like(self.torques)
@@ -121,10 +124,10 @@ def _reward_stand_still(self):
         rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
         rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
         rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
-        return (rew_vel + rew_pos - rew_base_vel) * self._switch()
+        return (rew_vel + rew_pos - rew_base_vel) * self._switch
 
     def _reward_tracking_lin_vel(self):
         """Tracking linear velocity commands (xy axes)"""
         # just use lin_vel?
         reward = super()._reward_tracking_lin_vel()
-        return reward * (1 - self._switch())
+        return reward * (1 - self._switch)
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
index b8a7a2fe..7a54bb1d 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -60,6 +60,7 @@ class reward_settings(MiniCheetahCfg.reward_settings):
         max_contact_force = 600.0
         base_height_target = 0.3
         tracking_sigma = 0.25
+        switch_scale = 0.1
 
     class scaling(MiniCheetahCfg.scaling):
         pass

From 69218dd1a7ffcd1c8a898a160f1bb6f17a75e888 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Tue, 10 Sep 2024 17:55:53 -0400
Subject: [PATCH 22/26] fixed logging bug that scaled with wrong dt

---
 gym/envs/base/fixed_robot.py                  | 21 ++++--------
 gym/envs/base/legged_robot.py                 | 20 ++++-------
 gym/envs/base/legged_robot_config.py          |  3 --
 .../mini_cheetah/mini_cheetah_ref_config.py   | 33 ++++++++++++++-----
 gym/envs/pendulum/pendulum_SAC_config.py      |  3 +-
 gym/envs/pendulum/pendulum_config.py          |  1 +
 learning/runners/my_runner.py                 | 17 +++++-----
 learning/runners/off_policy_runner.py         | 17 +++++-----
 learning/runners/on_policy_runner.py          | 17 +++++-----
 9 files changed, 64 insertions(+), 68 deletions(-)

diff --git a/gym/envs/base/fixed_robot.py b/gym/envs/base/fixed_robot.py
index 544eba52..f623e5d1 100644
--- a/gym/envs/base/fixed_robot.py
+++ b/gym/envs/base/fixed_robot.py
@@ -580,27 +580,18 @@ def _reward_dof_vel(self):
     def _reward_action_rate(self):
         """Penalize changes in actions"""
         nact = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :nact]
-                - self.dof_pos_history[:, nact : 2 * nact]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :nact] - self.dof_pos_history[:, 2 * nact :]
         )
         return -torch.mean(error, dim=1)
 
     def _reward_action_rate2(self):
         """Penalize changes in actions"""
         nact = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :nact]
-                - 2 * self.dof_pos_history[:, nact : 2 * nact]
-                + self.dof_pos_history[:, 2 * nact :]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :nact]
+            - 2 * self.dof_pos_history[:, nact : 2 * nact]
+            + self.dof_pos_history[:, 2 * nact :]
         )
         return -torch.mean(error, dim=1)
 
diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
index 4433d4fb..c80ebc85 100644
--- a/gym/envs/base/legged_robot.py
+++ b/gym/envs/base/legged_robot.py
@@ -1030,26 +1030,18 @@ def _reward_dof_vel(self):
     def _reward_action_rate(self):
         """Penalize changes in actions"""
         n = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :n] - self.dof_pos_history[:, n : 2 * n]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :n] - self.dof_pos_history[:, 2 * n :]
         )
         return -torch.mean(error, dim=1)
 
     def _reward_action_rate2(self):
         """Penalize changes in actions"""
         n = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :n]
-                - 2 * self.dof_pos_history[:, n : 2 * n]
-                + self.dof_pos_history[:, 2 * n :]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :n]
+            - 2 * self.dof_pos_history[:, n : 2 * n]
+            + self.dof_pos_history[:, 2 * n :]
         )
         return -torch.mean(error, dim=1)
 
diff --git a/gym/envs/base/legged_robot_config.py b/gym/envs/base/legged_robot_config.py
index 4336a749..c38f3e33 100644
--- a/gym/envs/base/legged_robot_config.py
+++ b/gym/envs/base/legged_robot_config.py
@@ -301,9 +301,6 @@ class algorithm:
         batch_size = 2**15
         max_gradient_steps = 24
         # new
-        storage_size = 2**17  # new
-        batch_size = 2**15  #  new
-
         clip_param = 0.2
         learning_rate = 1.0e-3
         max_grad_norm = 1.0
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
index 7a54bb1d..d3a301d3 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -85,7 +85,7 @@ class actor(MiniCheetahRunnerCfg.actor):
             "dof_vel",
             "phase_obs",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         actions = ["dof_pos_target"]
         disable_actions = False
@@ -115,7 +115,7 @@ class critic(MiniCheetahRunnerCfg.critic):
             "phase_obs",
             "dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         class reward:
             class weights:
@@ -128,13 +128,13 @@ class weights:
                 dof_vel = 0.0
                 min_base_height = 1.5
                 collision = 0.0
-                action_rate = 1e-4
-                action_rate2 = 1e-5
+                action_rate = 1e-5
+                action_rate2 = 1e-6
                 stand_still = 0.0
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
                 dof_near_home = 0.0
-                reference_traj = 1.5
+                reference_traj = 0.0
                 swing_grf = 1.5
                 stance_grf = 1.5
 
@@ -142,11 +142,28 @@ class termination_weight:
                 termination = 0.15
 
     class algorithm(MiniCheetahRunnerCfg.algorithm):
-        pass
+        # both
+        gamma = 0.99
+        lam = 0.95
+        # shared
+        batch_size = 2 * 4096  # use all the data
+        max_gradient_steps = 50
+
+        clip_param = 0.2
+        learning_rate = 1.0e-3
+        max_grad_norm = 1.0
+        # Critic
+        use_clipped_value_loss = True
+        # Actor
+        entropy_coef = 0.01
+        schedule = "adaptive"  # could be adaptive, fixed
+        desired_kl = 0.01
+        lr_range = [2e-5, 1e-2]
+        lr_ratio = 1.5
 
     class runner(MiniCheetahRunnerCfg.runner):
         run_name = ""
         experiment_name = "mini_cheetah_ref"
-        max_iterations = 1000  # number of policy updates
+        max_iterations = 500  # number of policy updates
         algorithm_class_name = "PPO2"
-        num_steps_per_env = 32  # deprecate
+        num_steps_per_env = 20  # deprecate
diff --git a/gym/envs/pendulum/pendulum_SAC_config.py b/gym/envs/pendulum/pendulum_SAC_config.py
index 836a6185..42bf0265 100644
--- a/gym/envs/pendulum/pendulum_SAC_config.py
+++ b/gym/envs/pendulum/pendulum_SAC_config.py
@@ -17,7 +17,7 @@ class init_state(PendulumCfg.init_state):
         dof_vel_range = {"theta": [-5, 5]}
 
     class control(PendulumCfg.control):
-        ctrl_frequency = 10
+        ctrl_frequency = 100
         desired_sim_frequency = 100
 
     class asset(PendulumCfg.asset):
@@ -37,6 +37,7 @@ class PendulumSACRunnerCfg(FixedRobotCfgPPO):
     runner_class_name = "OffPolicyRunner"
 
     class actor(FixedRobotCfgPPO.actor):
+        frequency = 10
         latent_nn = {"hidden_dims": [128, 64], "activation": "elu", "layer_norm": True}
         mean_nn = {"hidden_dims": [32], "activation": "elu", "layer_norm": True}
         std_nn = {"hidden_dims": [32], "activation": "elu", "layer_norm": True}
diff --git a/gym/envs/pendulum/pendulum_config.py b/gym/envs/pendulum/pendulum_config.py
index bb33c1f1..910fe341 100644
--- a/gym/envs/pendulum/pendulum_config.py
+++ b/gym/envs/pendulum/pendulum_config.py
@@ -62,6 +62,7 @@ class PendulumRunnerCfg(FixedRobotCfgPPO):
     runner_class_name = "OnPolicyRunner"
 
     class actor(FixedRobotCfgPPO.actor):
+        frequency = 25
         hidden_dims = [128, 64, 32]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "tanh"
diff --git a/learning/runners/my_runner.py b/learning/runners/my_runner.py
index 4c2f5d52..e0da1a1b 100644
--- a/learning/runners/my_runner.py
+++ b/learning/runners/my_runner.py
@@ -16,12 +16,6 @@
 class MyRunner(OnPolicyRunner):
     def __init__(self, env, train_cfg, device="cpu"):
         super().__init__(env, train_cfg, device)
-        logger.initialize(
-            self.env.num_envs,
-            self.env.dt,
-            self.cfg["max_iterations"],
-            self.device,
-        )
 
     def _set_up_alg(self):
         num_actor_obs = self.get_obs_size(self.actor_cfg["obs"])
@@ -34,10 +28,9 @@ def _set_up_alg(self):
         self.alg = alg_class(actor, critic, device=self.device, **self.alg_cfg)
 
     def learn(self, states_to_log_dict=None):
-        self.set_up_logger()
-
         n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
         assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+        self.set_up_logger(dt=self.env.dt * n_policy_steps)
 
         rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
@@ -164,7 +157,13 @@ def burn_in_normalization(self, n_iterations=100):
             self.alg.critic.evaluate(critic_obs)
         self.env.reset()
 
-    def set_up_logger(self):
+    def set_up_logger(self, dt=None):
+        if dt is None:
+            dt = self.env.dt
+        logger.initialize(
+            self.env.num_envs, dt, self.cfg["max_iterations"], self.device
+        )
+
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
             list(self.critic_cfg["reward"]["termination_weight"].keys())
diff --git a/learning/runners/off_policy_runner.py b/learning/runners/off_policy_runner.py
index 516c4a3e..83249aef 100644
--- a/learning/runners/off_policy_runner.py
+++ b/learning/runners/off_policy_runner.py
@@ -16,12 +16,6 @@
 class OffPolicyRunner(BaseRunner):
     def __init__(self, env, train_cfg, device="cpu"):
         super().__init__(env, train_cfg, device)
-        logger.initialize(
-            self.env.num_envs,
-            self.env.dt,
-            self.cfg["max_iterations"],
-            self.device,
-        )
 
     def _set_up_alg(self):
         num_actor_obs = self.get_obs_size(self.actor_cfg["obs"])
@@ -46,10 +40,9 @@ def _set_up_alg(self):
         )
 
     def learn(self):
-        self.set_up_logger()
-
         n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
         assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+        self.set_up_logger(dt=self.env.dt * n_policy_steps)
 
         rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
@@ -237,7 +230,13 @@ def initialize_rewards_dict(self, n_steps):
             )
         return rewards_dict
 
-    def set_up_logger(self):
+    def set_up_logger(self, dt=None):
+        if dt is None:
+            dt = self.env.dt
+        logger.initialize(
+            self.env.num_envs, dt, self.cfg["max_iterations"], self.device
+        )
+
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
             list(self.critic_cfg["reward"]["termination_weight"].keys())
diff --git a/learning/runners/on_policy_runner.py b/learning/runners/on_policy_runner.py
index 7167f03e..4831b615 100644
--- a/learning/runners/on_policy_runner.py
+++ b/learning/runners/on_policy_runner.py
@@ -14,18 +14,11 @@
 class OnPolicyRunner(BaseRunner):
     def __init__(self, env, train_cfg, device="cpu"):
         super().__init__(env, train_cfg, device)
-        logger.initialize(
-            self.env.num_envs,
-            self.env.dt,
-            self.cfg["max_iterations"],
-            self.device,
-        )
 
     def learn(self, states_to_log_dict=None):
-        self.set_up_logger()
-
         n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
         assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+        self.set_up_logger(dt=self.env.dt * n_policy_steps)
 
         rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
@@ -188,7 +181,13 @@ def initialize_rewards_dict(self, n_steps):
             )
         return rewards_dict
 
-    def set_up_logger(self):
+    def set_up_logger(self, dt=None):
+        if dt is None:
+            dt = self.env.dt
+        logger.initialize(
+            self.env.num_envs, dt, self.cfg["max_iterations"], self.device
+        )
+
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
             list(self.critic_cfg["reward"]["termination_weight"].keys())

From b1bd4f2b9b20c2c67877d61e8c4daee513d65b17 Mon Sep 17 00:00:00 2001
From: Steve Heim <heim.steve@gmail.com>
Date: Wed, 11 Sep 2024 14:37:52 -0400
Subject: [PATCH 23/26] hardcode Jacobian (halves time for _apply_coupling)

---
 .../mini_cheetah/mini_cheetah_ref_config.py   |  4 ++
 gym/envs/mit_humanoid/mit_humanoid.py         | 28 ++++++------
 gym/envs/mit_humanoid/mit_humanoid_config.py  | 43 +++++++++----------
 learning/modules/QRCritics.py                 |  2 +-
 4 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
index d3a301d3..37c52919 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -74,6 +74,7 @@ class actor(MiniCheetahRunnerCfg.actor):
         frequency = 100
         hidden_dims = [256, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        layer_norm = [True, True, False]
         activation = "elu"
         smooth_exploration = False
         exploration_sample_freq = 16
@@ -102,6 +103,9 @@ class noise:
 
     class critic(MiniCheetahRunnerCfg.critic):
         hidden_dims = [256, 256, 128]
+        layer_norm = [True, True, False]
+        dropouts = [0.1, 0.0, 0.0]
+
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
         obs = [
diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index a580fd57..a2e082e8 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -32,29 +32,28 @@ def _init_sampled_history_buffers(self):
         self.sampled_history_threshold = int(
             self.cfg.control.ctrl_frequency / self.cfg.env.sampled_history_frequency
         )
+        self.J = torch.eye(self.num_dof).to(self.device)
+        self.J[4, 3] = 1
+        self.J[9, 8] = 1
+        self.J_inv_T = torch.inverse(self.J.T)
 
     def _apply_coupling(self, q, qd, q_des, qd_des, kp, kd, tau_ff):
         # Create a Jacobian matrix and move it to the same device as input tensors
-        J = torch.eye(q.shape[-1]).to(q.device)
-        J[4, 3] = 1
-        J[9, 8] = 1
 
         # Perform transformations using Jacobian
-        q = torch.matmul(q, J.T)
-        qd = torch.matmul(qd, J.T)
-        q_des = torch.matmul(q_des, J.T)
-        qd_des = torch.matmul(qd_des, J.T)
-
-        # Inverse of the transpose of Jacobian
-        J_inv_T = torch.inverse(J.T)
+        q = torch.matmul(q, self.J.T)
+        qd = torch.matmul(qd, self.J.T)
+        q_des = torch.matmul(q_des, self.J.T)
+        qd_des = torch.matmul(qd_des, self.J.T)
 
         # Compute feed-forward torques
-        tau_ff = torch.matmul(J_inv_T, tau_ff.T).T
+        tau_ff = torch.matmul(self.J_inv_T, tau_ff.T).T
 
         # Compute kp and kd terms
         kp = torch.diagonal(
             torch.matmul(
-                torch.matmul(J_inv_T, torch.diag_embed(kp, dim1=-2, dim2=-1)), J_inv_T.T
+                torch.matmul(self.J_inv_T, torch.diag_embed(kp, dim1=-2, dim2=-1)),
+                self.J_inv_T.T,
             ),
             dim1=-2,
             dim2=-1,
@@ -62,7 +61,8 @@ def _apply_coupling(self, q, qd, q_des, qd_des, kp, kd, tau_ff):
 
         kd = torch.diagonal(
             torch.matmul(
-                torch.matmul(J_inv_T, torch.diag_embed(kd, dim1=-2, dim2=-1)), J_inv_T.T
+                torch.matmul(self.J_inv_T, torch.diag_embed(kd, dim1=-2, dim2=-1)),
+                self.J_inv_T.T,
             ),
             dim1=-2,
             dim2=-1,
@@ -70,7 +70,7 @@ def _apply_coupling(self, q, qd, q_des, qd_des, kp, kd, tau_ff):
 
         # Compute torques
         torques = kp * (q_des - q) + kd * (qd_des - qd) + tau_ff
-        torques = torch.matmul(torques, J)
+        torques = torch.matmul(torques, self.J)
 
         return torques
 
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index dc02a211..861f2826 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -112,7 +112,7 @@ class control(LeggedRobotCfg.control):
         }  # [N*m*s/rad]
 
         ctrl_frequency = 100
-        desired_sim_frequency = 1000
+        desired_sim_frequency = 500
 
         filter_gain = 0.1586  # 1: no filtering, 0: wall
 
@@ -123,7 +123,7 @@ class commands:
         resampling_time = 10.0  # time before command are changed[s]
 
         class ranges:
-            lin_vel_x = [-0.0, 0.0]  # min max [m/s] [-0.75, 0.75]
+            lin_vel_x = [-0.0, 4.0]  # min max [m/s] [-0.75, 0.75]
             lin_vel_y = 0.0  # max [m/s]
             yaw_vel = 0.0  # max [rad/s]
 
@@ -227,10 +227,12 @@ class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
     runner_class_name = "OnPolicyRunner"
 
     class actor(LeggedRobotRunnerCfg.actor):
+        frequency = 100
         init_noise_std = 1.0
         hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
+        layer_norm = [True, True, False]
         smooth_exploration = False
 
         obs = [
@@ -238,16 +240,15 @@ class actor(LeggedRobotRunnerCfg.actor):
             "base_lin_vel",
             "base_ang_vel",
             "projected_gravity",
-            # "commands",
+            "commands",
             "dof_pos_obs",
             "dof_vel",
-            "dof_pos_history",
-            # "sampled_history_dof_pos",
-            # "sampled_history_dof_vel",
-            # "sampled_history_dof_pos_target",
-            "oscillator_obs",
+            # "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         actions = ["dof_pos_target"]
         disable_actions = False
@@ -264,26 +265,28 @@ class critic(LeggedRobotRunnerCfg.critic):
         hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
+        layer_norm = [True, True, False]
 
         obs = [
             "base_height",
             "base_lin_vel",
             "base_ang_vel",
             "projected_gravity",
-            "commands",
             "dof_pos_obs",
             "dof_vel",
-            "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         class reward:
             class weights:
-                # tracking_lin_vel = 0.0
-                # tracking_ang_vel = 1.5
+                tracking_lin_vel = 4.0
+                tracking_ang_vel = 0.5
                 # orientation = 1.0
                 torques = 5.0e-4
-                # min_base_height = 1.5
+                min_base_height = 1.5
                 action_rate = 1e-3
                 action_rate2 = 1e-3
                 lin_vel_z = 0.0
@@ -292,8 +295,6 @@ class weights:
                 # stand_still = 0.25
                 dof_pos_limits = 0.25
                 dof_near_home = 0.25
-                stance = 1.0
-                swing = 1.0
                 hips_forward = 0.0
                 walk_freq = 0.0  # 2.5
 
@@ -305,12 +306,8 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
         gamma = 0.99
         lam = 0.95
         # shared
-        batch_size = 2**15
-        max_gradient_steps = 24
-        # new
-        storage_size = 2**17  # new
-        batch_size = 2**15  #  new
-
+        batch_size = 4096
+        max_gradient_steps = 48
         clip_param = 0.2
         learning_rate = 1.0e-3
         max_grad_norm = 1.0
diff --git a/learning/modules/QRCritics.py b/learning/modules/QRCritics.py
index d2ed9378..9dd51489 100644
--- a/learning/modules/QRCritics.py
+++ b/learning/modules/QRCritics.py
@@ -19,7 +19,7 @@ def init_weights(m):
         m.bias.data.fill_(0.01)
 
 
-class Critic(nn.Module):
+class Critic2(nn.Module):
     def __init__(
         self,
         num_obs,

From 2a7d1a1c099ba47e67ccfd7c693a566a82d54b88 Mon Sep 17 00:00:00 2001
From: sheim <heim.steve@gmail.com>
Date: Wed, 10 Dec 2025 16:56:22 -0500
Subject: [PATCH 24/26] ruff

---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 075e1dbf..14539b8c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.3
+    rev: v0.14.2
     hooks:
       - id: ruff-format
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.3
+    rev: v0.14.2
     hooks:
       - id: ruff
 

From 8b9fcb137d9342acfd167f6451336b86ed01f3ad Mon Sep 17 00:00:00 2001
From: sheim <heim.steve@gmail.com>
Date: Wed, 10 Dec 2025 16:56:50 -0500
Subject: [PATCH 25/26] ruff

---
 learning/utils/logger/PerIterationLogs.py     |  6 ++--
 learning/utils/logger/test_logger.py          | 36 +++++++++----------
 .../test_runner_integration.py                | 18 +++++-----
 .../test_generated_torch_files.py             |  6 ++--
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/learning/utils/logger/PerIterationLogs.py b/learning/utils/logger/PerIterationLogs.py
index 0f30b899..5d781efb 100644
--- a/learning/utils/logger/PerIterationLogs.py
+++ b/learning/utils/logger/PerIterationLogs.py
@@ -6,9 +6,9 @@ def __init__(self):
 
     def register_items(self, category, target, attribute_list):
         if category in self.targets.keys():
-            assert (
-                self.targets[category] == target
-            ), "Category already registered with different target"
+            assert self.targets[category] == target, (
+                "Category already registered with different target"
+            )
         else:
             self.targets[category] = target
             self.logs[category] = {}
diff --git a/learning/utils/logger/test_logger.py b/learning/utils/logger/test_logger.py
index eaef76f6..941a4f0b 100644
--- a/learning/utils/logger/test_logger.py
+++ b/learning/utils/logger/test_logger.py
@@ -10,9 +10,9 @@ def __init__(self):
 
 def all_rewards_registered(logger, reward_names):
     for key in reward_names:
-        assert (
-            key in logger.reward_logs.log_items.keys()
-        ), "key not registered in logger."
+        assert key in logger.reward_logs.log_items.keys(), (
+            "key not registered in logger."
+        )
 
 
 def only_rewards_registered(logger, reward_names):
@@ -27,13 +27,13 @@ def only_category_registered(logger, categories):
 
 def both_target_and_log_set_up(logger):
     for key in logger.iteration_logs.logs.keys():
-        assert (
-            key in logger.iteration_logs.targets.keys()
-        ), "target not registered in PerIteration."
+        assert key in logger.iteration_logs.targets.keys(), (
+            "target not registered in PerIteration."
+        )
     for key in logger.iteration_logs.targets.keys():
-        assert (
-            key in logger.iteration_logs.logs.keys()
-        ), "log not registered in PerIteration."
+        assert key in logger.iteration_logs.logs.keys(), (
+            "log not registered in PerIteration."
+        )
 
 
 def test_logger_setup():
@@ -71,16 +71,16 @@ def check_episode_count(logger, expected_count=1):
 
 def check_average_time(logger, expected_time):
     avg_time = logger.reward_logs.get_average_time()
-    assert (
-        abs(avg_time.item() - expected_time) < 1e-5
-    ), f"Average time {avg_time} is not close to {expected_time}"
+    assert abs(avg_time.item() - expected_time) < 1e-5, (
+        f"Average time {avg_time} is not close to {expected_time}"
+    )
 
 
 def check_average_reward(logger, reward_name, expected_average):
     avg_reward = logger.reward_logs.get_average_rewards()[reward_name]
-    assert (
-        abs(avg_reward.item() - expected_average) < 1e-5
-    ), f"Average reward {avg_reward} is not close to {expected_average}"
+    assert abs(avg_reward.item() - expected_average) < 1e-5, (
+        f"Average reward {avg_reward} is not close to {expected_average}"
+    )
 
 
 def test_logging_rewards():
@@ -173,8 +173,8 @@ def test_timer():
 
     ETA2 = logger.estimate_ETA(["first_step"], mode="total")
     expected_ETA2 = a * (1000 - 1)
-    assert (
-        abs(ETA2 - expected_ETA2) < 1e-5
-    ), f"ETA {ETA2} is not close to {expected_ETA2}"
+    assert abs(ETA2 - expected_ETA2) < 1e-5, (
+        f"ETA {ETA2} is not close to {expected_ETA2}"
+    )
 
     assert (a + b) >= 2 * trial_time, "Timer not working correctly."
diff --git a/tests/integration_tests/test_runner_integration.py b/tests/integration_tests/test_runner_integration.py
index 611108e5..d847f627 100644
--- a/tests/integration_tests/test_runner_integration.py
+++ b/tests/integration_tests/test_runner_integration.py
@@ -58,9 +58,9 @@ def test_default_integration_settings(self, args):
         with torch.no_grad():
             actions = runner.get_inference_actions()
             deployed_actions = runner.env.get_states(runner.actor_cfg["actions"])
-        assert (
-            torch.equal(actions, torch.zeros_like(actions)) is False
-        ), "Policy returning all zeros"
+        assert torch.equal(actions, torch.zeros_like(actions)) is False, (
+            "Policy returning all zeros"
+        )
         assert (
             torch.equal(deployed_actions, torch.zeros_like(deployed_actions)) is False
         ), "Actions not written to environment"
@@ -78,9 +78,9 @@ def test_default_integration_settings(self, args):
         model_7_path = os.path.join(runner.log_dir, "model_7.pt")
         model_8_path = os.path.join(runner.log_dir, "model_8.pt")
 
-        assert os.path.exists(
-            model_0_path
-        ), f"{model_0_path} (pre-iteration) was not saved"
+        assert os.path.exists(model_0_path), (
+            f"{model_0_path} (pre-iteration) was not saved"
+        )
         assert not os.path.exists(model_1_path), f"{model_1_path} was saved"
         assert not os.path.exists(model_2_path), f"{model_2_path} was saved"
         assert os.path.exists(model_3_path), f"{model_3_path} was not saved"
@@ -88,9 +88,9 @@ def test_default_integration_settings(self, args):
         assert not os.path.exists(model_5_path), f"{model_5_path} was saved"
         assert os.path.exists(model_6_path), f"{model_6_path} was not saved"
         assert not os.path.exists(model_7_path), f"{model_7_path} was saved"
-        assert os.path.exists(
-            model_8_path
-        ), f"{model_5_path}(last iteration) was not saved"
+        assert os.path.exists(model_8_path), (
+            f"{model_5_path}(last iteration) was not saved"
+        )
 
         obs = torch.randn_like(runner.get_obs(runner.actor_cfg["obs"]))
         actions_first = runner.alg.actor.act_inference(obs).cpu().clone()
diff --git a/tests/regression_tests/test_generated_torch_files.py b/tests/regression_tests/test_generated_torch_files.py
index cf640ed3..55626034 100644
--- a/tests/regression_tests/test_generated_torch_files.py
+++ b/tests/regression_tests/test_generated_torch_files.py
@@ -27,6 +27,6 @@ def test_generated_tensor_matches_reference(tmp_path):
     _generate_candidate_tensor(candidate_path)
     reference = _load_tensor(REFERENCE_FILE)
     candidate = _load_tensor(candidate_path)
-    assert torch.equal(
-        reference, candidate
-    ), f"Tensors in {REFERENCE_FILE} and {candidate_path} differ."
+    assert torch.equal(reference, candidate), (
+        f"Tensors in {REFERENCE_FILE} and {candidate_path} differ."
+    )

From 18343284aed6a8370628680a1c167ab7747c0f4f Mon Sep 17 00:00:00 2001
From: sheim <heim.steve@gmail.com>
Date: Wed, 10 Dec 2025 18:05:50 -0500
Subject: [PATCH 26/26] catch sample history bug

---
 gym/envs/mit_humanoid/mit_humanoid.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index a2e082e8..cb4db70f 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -84,8 +84,8 @@ def _reset_system(self, env_ids):
     def _reset_sampled_history_buffers(self, ids):
         n = self.cfg.env.sampled_history_length
         self.sampled_history_dof_pos_target[ids] = self.dof_pos_target[ids].tile(n)
-        self.sampled_history_dof_pos[ids] = self.dof_pos_target[ids].tile(n)
-        self.sampled_history_dof_vel[ids] = self.dof_pos_target[ids].tile(n)
+        self.sampled_history_dof_pos[ids] = self.dof_pos[ids].tile(n)
+        self.sampled_history_dof_vel[ids] = self.dof_vel[ids].tile(n)
 
     # compute_torques accounting for coupling, and filtering torques
     def _compute_torques(self):
@@ -121,11 +121,11 @@ def _update_sampled_history_buffers(self):
         self.sampled_history_dof_pos[ids] = torch.roll(
             self.sampled_history_dof_pos[ids], self.num_dof, dims=1
         )  # check
-        self.sampled_history_dof_pos[ids, : self.num_dof] = self.dof_pos_target[ids]
+        self.sampled_history_dof_pos[ids, : self.num_dof] = self.dof_pos[ids]
         self.sampled_history_dof_vel[ids] = torch.roll(
             self.sampled_history_dof_vel[ids], self.num_dof, dims=1
         )  # check
-        self.sampled_history_dof_vel[ids, : self.num_dof] = self.dof_pos_target[ids]
+        self.sampled_history_dof_vel[ids, : self.num_dof] = self.dof_vel[ids]
 
         self.sampled_history_counter[ids] = 0