diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 075e1dbf..14539b8c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.3
+    rev: v0.14.2
     hooks:
       - id: ruff-format
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.3
+    rev: v0.14.2
     hooks:
       - id: ruff
 
diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py
index 0aa7ce31..826b25e6 100644
--- a/gym/envs/__init__.py
+++ b/gym/envs/__init__.py
@@ -20,6 +20,7 @@
     "A1": ".a1.a1",
     "HumanoidRunning": ".mit_humanoid.humanoid_running",
     "Pendulum": ".pendulum.pendulum",
+    "Lander": ".mit_humanoid.lander",
 }
 
 config_dict = {
@@ -34,6 +35,7 @@
     "HumanoidRunningCfg": ".mit_humanoid.humanoid_running_config",
     "PendulumCfg": ".pendulum.pendulum_config",
     "PendulumSACCfg": ".pendulum.pendulum_SAC_config",
+    "LanderCfg": ".mit_humanoid.lander_config",
     "PendulumPSDCfg": ".pendulum.pendulum_PSD_config",
 }
 
@@ -49,6 +51,7 @@
     "HumanoidRunningRunnerCfg": ".mit_humanoid.humanoid_running_config",
     "PendulumRunnerCfg": ".pendulum.pendulum_config",
     "PendulumSACRunnerCfg": ".pendulum.pendulum_SAC_config",
+    "LanderRunnerCfg": ".mit_humanoid.lander_config",
     "PendulumPSDRunnerCfg": ".pendulum.pendulum_PSD_config",
 }
 
@@ -79,6 +82,7 @@
     "flat_anymal_c": ["Anymal", "AnymalCFlatCfg", "AnymalCFlatRunnerCfg"],
     "pendulum": ["Pendulum", "PendulumCfg", "PendulumRunnerCfg"],
     "sac_pendulum": ["Pendulum", "PendulumSACCfg", "PendulumSACRunnerCfg"],
+    "lander": ["Lander", "LanderCfg", "LanderRunnerCfg"],
     "psd_pendulum": ["Pendulum", "PendulumPSDCfg", "PendulumPSDRunnerCfg"],
 }
 
diff --git a/gym/envs/base/base_task.py b/gym/envs/base/base_task.py
index 551143a3..033e0eeb 100644
--- a/gym/envs/base/base_task.py
+++ b/gym/envs/base/base_task.py
@@ -18,33 +18,20 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
         # * env device is GPU only if sim is on GPU and use_gpu_pipeline=True,
         # * otherwise returned tensors are copied to CPU by physX.
         if sim_device_type == "cuda" and sim_params.use_gpu_pipeline:
-            self.device = self.sim_device
+            device = self.sim_device
         else:
-            self.device = "cpu"
+            device = "cpu"
 
         # * graphics device for rendering, -1 for no rendering
         self.graphics_device_id = self.sim_device_id
 
-        self.num_envs = cfg.env.num_envs
         self.num_actuators = cfg.env.num_actuators
 
         # * optimization flags for pytorch JIT
         torch._C._jit_set_profiling_mode(False)
         torch._C._jit_set_profiling_executor(False)
 
-        # allocate buffers
-        self.to_be_reset = torch.ones(
-            self.num_envs, device=self.device, dtype=torch.bool
-        )
-        self.terminated = torch.ones(
-            self.num_envs, device=self.device, dtype=torch.bool
-        )
-        self.episode_length_buf = torch.zeros(
-            self.num_envs, device=self.device, dtype=torch.long
-        )
-        self.timed_out = torch.zeros(
-            self.num_envs, device=self.device, dtype=torch.bool
-        )
+        super().__init__(num_envs=cfg.env.num_envs, device=device)
 
         # todo: read from config
         self.enable_viewer_sync = True
diff --git a/gym/envs/base/fixed_robot.py b/gym/envs/base/fixed_robot.py
index 544eba52..f623e5d1 100644
--- a/gym/envs/base/fixed_robot.py
+++ b/gym/envs/base/fixed_robot.py
@@ -580,27 +580,18 @@ def _reward_dof_vel(self):
     def _reward_action_rate(self):
         """Penalize changes in actions"""
         nact = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :nact]
-                - self.dof_pos_history[:, nact : 2 * nact]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :nact] - self.dof_pos_history[:, 2 * nact :]
         )
         return -torch.mean(error, dim=1)
 
     def _reward_action_rate2(self):
         """Penalize changes in actions"""
         nact = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :nact]
-                - 2 * self.dof_pos_history[:, nact : 2 * nact]
-                + self.dof_pos_history[:, 2 * nact :]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :nact]
+            - 2 * self.dof_pos_history[:, nact : 2 * nact]
+            + self.dof_pos_history[:, 2 * nact :]
         )
         return -torch.mean(error, dim=1)
 
diff --git a/gym/envs/base/legged_robot.py b/gym/envs/base/legged_robot.py
index d092dd5e..c80ebc85 100644
--- a/gym/envs/base/legged_robot.py
+++ b/gym/envs/base/legged_robot.py
@@ -110,12 +110,12 @@ def _post_decimation_step(self):
 
         self.base_height = self.root_states[:, 2:3]
 
-        n = self.num_actuators
-        self.dof_pos_history[:, 2 * n :] = self.dof_pos_history[:, n : 2 * n]
-        self.dof_pos_history[:, n : 2 * n] = self.dof_pos_history[:, :n]
-        self.dof_pos_history[:, :n] = self.dof_pos_target
         self.dof_pos_obs = self.dof_pos - self.default_dof_pos
 
+        self.dof_pos_history = self.dof_pos_history.roll(self.num_actuators)
+        # self.dof_pos_history[:, : self.num_actuators] = self.dof_pos_obs
+        self.dof_pos_history[:, : self.num_actuators] = self.dof_pos_target
+
         env_ids = (
             self.episode_length_buf % int(self.cfg.commands.resampling_time / self.dt)
             == 0
@@ -133,7 +133,10 @@ def _reset_idx(self, env_ids):
         self._reset_system(env_ids)
         self._resample_commands(env_ids)
         # * reset buffers
-        self.dof_pos_history[env_ids] = 0.0
+        self.dof_pos_obs[env_ids] = self.dof_pos[env_ids] - self.default_dof_pos
+        # self.dof_pos_history[env_ids] = self.dof_pos_obs[env_ids].tile(3)
+        self.dof_pos_target[env_ids] = self.default_dof_pos
+        self.dof_pos_history[env_ids] = self.dof_pos_target[env_ids].tile(3)
         self.episode_length_buf[env_ids] = 0
 
     def _initialize_sim(self):
@@ -481,41 +484,26 @@ def _init_buffers(self):
             get_axis_params(-1.0, self.up_axis_idx), device=self.device
         ).repeat((self.num_envs, 1))
         self.torques = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.p_gains = torch.zeros(
-            self.num_actuators, dtype=torch.float, device=self.device
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.d_gains = torch.zeros(
-            self.num_actuators, dtype=torch.float, device=self.device
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.dof_pos_target = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.dof_vel_target = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
         self.tau_ff = torch.zeros(
-            self.num_envs,
-            self.num_actuators,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators, dtype=torch.float, device=self.device
         )
 
         self.dof_pos_history = torch.zeros(
-            self.num_envs,
-            self.num_actuators * 3,
-            dtype=torch.float,
-            device=self.device,
+            self.num_envs, self.num_actuators * 3, dtype=torch.float, device=self.device
         )
         self.commands = torch.zeros(
             self.num_envs, 3, dtype=torch.float, device=self.device
@@ -532,6 +520,29 @@ def _init_buffers(self):
             self.num_envs, 1, dtype=torch.float, device=self.device
         )
 
+        # # * get the body_name to body_index dict
+        # body_dict = self.gym.get_actor_rigid_body_dict(
+        #     self.envs[0], self.actor_handles[0]
+        # )
+        # # * extract a list of body_names where the index is the id number
+        # body_names = [
+        #     body_tuple[0]
+        #     for body_tuple in sorted(
+        #         body_dict.items(), key=lambda body_tuple: body_tuple[1]
+        #     )
+        # ]
+        # # * construct a list of id numbers corresponding to end_effectors
+        # self.end_effector_ids = []
+        # for end_effector_name in self.cfg.asset.foot_collisionbox_names:
+        #     self.end_effector_ids.extend(
+        #         [
+        #             body_names.index(body_name)
+        #             for body_name in body_names
+        #             if end_effector_name in body_name
+        #         ]
+        #     )
+        # # ----------------------------------------
+
         if self.cfg.terrain.measure_heights:
             self.height_points = self._init_height_points()
         self.measured_heights = 0
@@ -559,8 +570,8 @@ def _init_buffers(self):
             found = False
             for dof_name in self.cfg.control.stiffness.keys():
                 if dof_name in name:
-                    self.p_gains[i] = self.cfg.control.stiffness[dof_name]
-                    self.d_gains[i] = self.cfg.control.damping[dof_name]
+                    self.p_gains[:, i] = self.cfg.control.stiffness[dof_name]
+                    self.d_gains[:, i] = self.cfg.control.damping[dof_name]
                     found = True
             if not found:
                 self.p_gains[i] = 0.0
@@ -995,11 +1006,11 @@ def _reward_lin_vel_z(self):
 
     def _reward_ang_vel_xy(self):
         """Penalize xy axes base angular velocity"""
-        return -torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1)
+        return -torch.mean(torch.square(self.base_ang_vel[:, :2]), dim=1)
 
     def _reward_orientation(self):
         """Penalize non flat base orientation"""
-        return -torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1)
+        return -torch.mean(torch.square(self.projected_gravity[:, :2]), dim=1)
 
     def _reward_base_height(self):
         """Penalize base height away from target"""
@@ -1019,26 +1030,18 @@ def _reward_dof_vel(self):
     def _reward_action_rate(self):
         """Penalize changes in actions"""
         n = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :n] - self.dof_pos_history[:, n : 2 * n]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :n] - self.dof_pos_history[:, 2 * n :]
         )
         return -torch.mean(error, dim=1)
 
     def _reward_action_rate2(self):
         """Penalize changes in actions"""
         n = self.num_actuators
-        dt2 = (self.dt * self.cfg.control.decimation) ** 2
-        error = (
-            torch.square(
-                self.dof_pos_history[:, :n]
-                - 2 * self.dof_pos_history[:, n : 2 * n]
-                + self.dof_pos_history[:, 2 * n :]
-            )
-            / dt2
+        error = torch.square(
+            self.dof_pos_history[:, :n]
+            - 2 * self.dof_pos_history[:, n : 2 * n]
+            + self.dof_pos_history[:, 2 * n :]
         )
         return -torch.mean(error, dim=1)
 
@@ -1084,7 +1087,7 @@ def _reward_tracking_lin_vel(self):
         """Tracking of linear velocity commands (xy axes)"""
         error = torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2])
         error = torch.exp(-error / self.cfg.reward_settings.tracking_sigma)
-        return torch.sum(error, dim=1)
+        return torch.mean(error, dim=1)
 
     def _reward_tracking_ang_vel(self):
         """Tracking of angular velocity commands (yaw)"""
@@ -1093,7 +1096,7 @@ def _reward_tracking_ang_vel(self):
 
     def _reward_feet_contact_forces(self):
         """penalize high contact forces"""
-        return -torch.sum(
+        return -torch.mean(
             (
                 torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
                 - self.cfg.reward_settings.max_contact_force
diff --git a/gym/envs/base/legged_robot_config.py b/gym/envs/base/legged_robot_config.py
index 0b9cf7a4..c38f3e33 100644
--- a/gym/envs/base/legged_robot_config.py
+++ b/gym/envs/base/legged_robot_config.py
@@ -163,7 +163,7 @@ class asset:
         file = ""
         # * name of the feet bodies,
         # * used to index body state and contact force tensors
-        foot_name = "None"
+        foot_name = "foot"
         penalize_contacts_on = []
         terminate_after_contacts_on = []
         end_effector_names = []
@@ -301,9 +301,6 @@ class algorithm:
         batch_size = 2**15
         max_gradient_steps = 24
         # new
-        storage_size = 2**17  # new
-        batch_size = 2**15  #  new
-
         clip_param = 0.2
         learning_rate = 1.0e-3
         max_grad_norm = 1.0
diff --git a/gym/envs/base/task_skeleton.py b/gym/envs/base/task_skeleton.py
index 0974970a..4dfcb280 100644
--- a/gym/envs/base/task_skeleton.py
+++ b/gym/envs/base/task_skeleton.py
@@ -5,10 +5,15 @@
 
 
 class TaskSkeleton:
-    def __init__(self, num_envs=1, max_episode_length=1.0, device="cpu"):
+    def __init__(self, num_envs=1, device="cpu"):
         self.num_envs = num_envs
-        self.max_episode_length = max_episode_length
         self.device = device
+
+        self.to_be_reset = torch.ones(num_envs, device=device, dtype=torch.bool)
+        self.terminated = torch.ones(num_envs, device=device, dtype=torch.bool)
+        self.episode_length_buf = torch.zeros(num_envs, device=device, dtype=torch.long)
+        self.timed_out = torch.zeros(num_envs, device=device, dtype=torch.bool)
+
         return None
 
     def get_states(self, obs_list):
@@ -52,25 +57,12 @@ def _reset_buffers(self):
         self.terminated[:] = False
         self.timed_out[:] = False
 
-    def compute_reward(self, reward_weights):
-        """Compute and return a torch tensor of rewards
-        reward_weights: dict with keys matching reward names, and values
-            matching weights
-        """
-        reward = torch.zeros(self.num_envs, device=self.device, dtype=torch.float)
-        for name, weight in reward_weights.items():
-            reward += weight * self._eval_reward(name)
-        return reward
-
-    def _eval_reward(self, name):
-        return eval("self._reward_" + name + "()")
-
     def _check_terminations_and_timeouts(self):
         """Check if environments need to be reset"""
         contact_forces = self.contact_forces[:, self.termination_contact_indices, :]
         self.terminated |= torch.any(torch.norm(contact_forces, dim=-1) > 1.0, dim=1)
         self.timed_out = self.episode_length_buf >= self.max_episode_length
-        self.to_be_reset = self.timed_out | self.terminated
+        # self.to_be_reset = self.timed_out | self.terminated
 
     def step(self, actions):
         raise NotImplementedError
diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc.py b/gym/envs/mini_cheetah/mini_cheetah_osc.py
index 77866ca3..f8721459 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_osc.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc.py
@@ -16,6 +16,9 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
 
     def _init_buffers(self):
         super()._init_buffers()
+
+        self._switch = torch.zeros(self.num_envs, 1, device=self.device)
+
         self.oscillators = torch.zeros(self.num_envs, 4, device=self.device)
         self.oscillator_obs = torch.zeros(self.num_envs, 8, device=self.device)
 
@@ -158,6 +161,7 @@ def _post_decimation_step(self):
         """Update all states that are not handled in PhysX"""
         super()._post_decimation_step()
         self.grf = self._compute_grf()
+        self._update_cmd_switch()
         # self._step_oscillators()
 
     def _post_physx_step(self):
@@ -281,12 +285,16 @@ def _compute_grf(self, grf_norm=True):
         else:
             return grf
 
-    def _switch(self):
+    def _update_cmd_switch(self):
         c_vel = torch.linalg.norm(self.commands, dim=1)
-        return torch.exp(
+        self._switch = torch.exp(
             -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
             / self.cfg.reward_settings.switch_scale
         )
+        # return torch.exp(
+        #     -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
+        #     / self.cfg.reward_settings.switch_scale
+        # )
 
     def _reward_ang_vel_xy(self):
         """Penalize xy axes base angular velocity"""
@@ -345,10 +353,10 @@ def _reward_coupled_grf(self):
         return prod - torch.ones_like(prod)
 
     def _reward_dof_vel(self):
-        return super()._reward_dof_vel() * self._switch()
+        return super()._reward_dof_vel() * self._switch
 
     def _reward_dof_near_home(self):
-        return super()._reward_dof_near_home() * self._switch()
+        return super()._reward_dof_near_home() * self._switch
 
     def _reward_stand_still(self):
         """Penalize motion at zero commands"""
@@ -359,11 +367,11 @@ def _reward_stand_still(self):
         rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
         rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
         rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
-        return (rew_vel + rew_pos - rew_base_vel) * self._switch()
+        return (rew_vel + rew_pos - rew_base_vel) * self._switch
 
     def _reward_standing_torques(self):
         """Penalize torques at zero commands"""
-        return super()._reward_torques() * self._switch()
+        return super()._reward_torques() * self._switch
 
     # * gait similarity scores
     def angle_difference(self, theta1, theta2):
diff --git a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
index 2da73256..99ea1dc1 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_osc_config.py
@@ -171,6 +171,7 @@ class MiniCheetahOscRunnerCfg(MiniCheetahRunnerCfg):
     runner_class_name = "OnPolicyRunner"
 
     class actor(MiniCheetahRunnerCfg.actor):
+        frequency = 100
         hidden_dims = [256, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
@@ -244,8 +245,8 @@ class weights:
                 dof_vel = 0.0
                 min_base_height = 1.0
                 collision = 0
-                action_rate = 0.1  # -0.01
-                action_rate2 = 0.01  # -0.001
+                action_rate = 10  # -0.01
+                action_rate2 = 1  # -0.001
                 stand_still = 0.0
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref.py b/gym/envs/mini_cheetah/mini_cheetah_ref.py
index 57e1f0e7..0084a80c 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref.py
@@ -17,6 +17,7 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
 
     def _init_buffers(self):
         super()._init_buffers()
+        self._switch = torch.zeros(self.num_envs, 1, device=self.device)
         self.phase = torch.zeros(
             self.num_envs, 1, dtype=torch.float, device=self.device
         )
@@ -41,6 +42,7 @@ def _post_decimation_step(self):
         self.phase_obs = torch.cat(
             (torch.sin(self.phase), torch.cos(self.phase)), dim=1
         )
+        self._update_cmd_switch()
 
     def _resample_commands(self, env_ids):
         super()._resample_commands(env_ids)
@@ -50,10 +52,20 @@ def _resample_commands(self, env_ids):
         ).squeeze(1)
         self.commands[env_ids, :3] *= (rand_ids < 0.9).unsqueeze(1)
 
-    def _switch(self):
+    def _check_terminations_and_timeouts(self):
+        """Check if environments need to be reset"""
+        contact_forces = self.contact_forces[:, self.termination_contact_indices, :]
+        self.terminated |= torch.any(torch.norm(contact_forces, dim=-1) > 1.0, dim=1)
+        self.timed_out = self.episode_length_buf >= self.max_episode_length
+        # self.to_be_reset = self.timed_out | self.terminated
+
+    # ---
+
+    def _update_cmd_switch(self):
         c_vel = torch.linalg.norm(self.commands, dim=1)
-        return torch.exp(
-            -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1)) / 0.1
+        self._switch = torch.exp(
+            -torch.square(torch.max(torch.zeros_like(c_vel), c_vel - 0.1))
+            / self.cfg.reward_settings.switch_scale
         )
 
     def _reward_swing_grf(self):
@@ -64,7 +76,7 @@ def _reward_swing_grf(self):
         )
         ph_off = torch.lt(self.phase, torch.pi)
         rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)
-        return -torch.sum(rew.float(), dim=1) * (1 - self._switch())
+        return -torch.sum(rew.float(), dim=1) * (1 - self._switch)
 
     def _reward_stance_grf(self):
         """Reward non-zero grf during stance (pi to 2pi)"""
@@ -75,7 +87,7 @@ def _reward_stance_grf(self):
         ph_off = torch.gt(self.phase, torch.pi)  # should this be in swing?
         rew = in_contact * torch.cat((ph_off, ~ph_off, ~ph_off, ph_off), dim=1)
 
-        return torch.sum(rew.float(), dim=1) * (1 - self._switch())
+        return torch.sum(rew.float(), dim=1) * (1 - self._switch)
 
     def _reward_reference_traj(self):
         """REWARDS EACH LEG INDIVIDUALLY BASED ON ITS POSITION IN THE CYCLE"""
@@ -84,7 +96,7 @@ def _reward_reference_traj(self):
         error /= self.scales["dof_pos"]
         reward = (self._sqrdexp(error) - torch.abs(error) * 0.2).mean(dim=1)
         # * only when commanded velocity is higher
-        return reward * (1 - self._switch())
+        return reward * (1 - self._switch)
 
     def _get_ref(self):
         leg_frame = torch.zeros_like(self.torques)
@@ -112,10 +124,10 @@ def _reward_stand_still(self):
         rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
         rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
         rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
-        return (rew_vel + rew_pos - rew_base_vel) * self._switch()
+        return (rew_vel + rew_pos - rew_base_vel) * self._switch
 
     def _reward_tracking_lin_vel(self):
         """Tracking linear velocity commands (xy axes)"""
         # just use lin_vel?
         reward = super()._reward_tracking_lin_vel()
-        return reward * (1 - self._switch())
+        return reward * (1 - self._switch)
diff --git a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
index 596b8d9a..5783737b 100644
--- a/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
+++ b/gym/envs/mini_cheetah/mini_cheetah_ref_config.py
@@ -26,7 +26,7 @@ class control(MiniCheetahCfg.control):
         stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0}
         damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5}
         gait_freq = 3.0
-        ctrl_frequency = 100
+        ctrl_frequency = 500
         desired_sim_frequency = 500
 
     class commands(MiniCheetahCfg.commands):
@@ -60,6 +60,7 @@ class reward_settings(MiniCheetahCfg.reward_settings):
         max_contact_force = 600.0
         base_height_target = 0.3
         tracking_sigma = 0.25
+        switch_scale = 0.1
 
     class scaling(MiniCheetahCfg.scaling):
         pass
@@ -70,8 +71,10 @@ class MiniCheetahRefRunnerCfg(MiniCheetahRunnerCfg):
     runner_class_name = "OnPolicyRunner"
 
     class actor(MiniCheetahRunnerCfg.actor):
+        frequency = 100
         hidden_dims = [256, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        layer_norm = [True, True, False]
         activation = "elu"
         smooth_exploration = False
         exploration_sample_freq = 16
@@ -83,7 +86,7 @@ class actor(MiniCheetahRunnerCfg.actor):
             "dof_vel",
             "phase_obs",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         actions = ["dof_pos_target"]
         disable_actions = False
@@ -100,6 +103,9 @@ class noise:
 
     class critic(MiniCheetahRunnerCfg.critic):
         hidden_dims = [256, 256, 128]
+        layer_norm = [True, True, False]
+        dropouts = [0.1, 0.0, 0.0]
+
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
         obs = [
@@ -113,7 +119,7 @@ class critic(MiniCheetahRunnerCfg.critic):
             "phase_obs",
             "dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         class reward:
             class weights:
@@ -132,7 +138,7 @@ class weights:
                 dof_pos_limits = 0.0
                 feet_contact_forces = 0.0
                 dof_near_home = 0.0
-                reference_traj = 1.5
+                reference_traj = 0.0
                 swing_grf = 1.5
                 stance_grf = 1.5
 
@@ -140,11 +146,28 @@ class termination_weight:
                 termination = 0.15
 
     class algorithm(MiniCheetahRunnerCfg.algorithm):
-        pass
+        # both
+        gamma = 0.99
+        lam = 0.95
+        # shared
+        batch_size = 2 * 4096  # use all the data
+        max_gradient_steps = 50
+
+        clip_param = 0.2
+        learning_rate = 1.0e-3
+        max_grad_norm = 1.0
+        # Critic
+        use_clipped_value_loss = True
+        # Actor
+        entropy_coef = 0.01
+        schedule = "adaptive"  # could be adaptive, fixed
+        desired_kl = 0.01
+        lr_range = [2e-5, 1e-2]
+        lr_ratio = 1.5
 
     class runner(MiniCheetahRunnerCfg.runner):
         run_name = ""
         experiment_name = "mini_cheetah_ref"
-        max_iterations = 1000  # number of policy updates
+        max_iterations = 500  # number of policy updates
         algorithm_class_name = "PPO2"
-        num_steps_per_env = 32  # deprecate
+        num_steps_per_env = 20  # deprecate
diff --git a/gym/envs/mit_humanoid/humanoid_running_config.py b/gym/envs/mit_humanoid/humanoid_running_config.py
index 43a5bb4a..5ad7f3b5 100644
--- a/gym/envs/mit_humanoid/humanoid_running_config.py
+++ b/gym/envs/mit_humanoid/humanoid_running_config.py
@@ -142,7 +142,7 @@ class asset(LeggedRobotCfg.asset):
         #         +'humanoid_fixed_arms_full.urdf')
         file = (
             "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
-            + "mit_humanoid/urdf/humanoid_F_sf.urdf"
+            + "mit_humanoid/urdf/humanoid_F_sf_learnt.urdf"
         )
         keypoints = ["base"]
         end_effectors = ["left_foot", "right_foot"]
diff --git a/gym/envs/mit_humanoid/lander.py b/gym/envs/mit_humanoid/lander.py
new file mode 100644
index 00000000..86d37321
--- /dev/null
+++ b/gym/envs/mit_humanoid/lander.py
@@ -0,0 +1,127 @@
+import torch
+
+# from gym.envs.base.legged_robot import LeggedRobot
+from gym.envs.mit_humanoid.mit_humanoid import MIT_Humanoid
+from isaacgym.torch_utils import torch_rand_float
+
+
+class Lander(MIT_Humanoid):
+    def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
+        super().__init__(gym, sim, cfg, sim_params, sim_device, headless)
+
+    def _resample_commands(self, env_ids):
+        """Randommly select commands of some environments
+
+        Args:
+            env_ids (List[int]): Environments ids for which new commands are needed
+        """
+        if len(env_ids) == 0:
+            return
+        super()._resample_commands(env_ids)
+        # * with 75% chance, reset to 0
+        self.commands[env_ids, :] *= (
+            torch_rand_float(0, 1, (len(env_ids), 1), device=self.device).squeeze(1)
+            < 0.25
+        ).unsqueeze(1)
+
+    def _check_terminations_and_timeouts(self):
+        """Check if environments need to be reset"""
+        contact_forces = self.contact_forces[:, self.termination_contact_indices, :]
+        self.terminated |= torch.any(torch.norm(contact_forces, dim=-1) > 1.0, dim=1)
+        self.timed_out = self.episode_length_buf >= self.max_episode_length
+        # self.to_be_reset = self.timed_out | self.terminated
+
+    # --- rewards ---
+
+    def _switch(self, mode=None):
+        c_vel = torch.linalg.norm(self.commands, dim=1)
+        switch = torch.exp(
+            -torch.square(
+                torch.max(
+                    torch.zeros_like(c_vel),
+                    c_vel - self.cfg.reward_settings.switch_scale,
+                )
+            )
+            / self.cfg.reward_settings.switch_scale
+        )
+        if mode is None or mode == "stand":
+            return switch
+        elif mode == "move":
+            return 1 - switch
+
+    def _reward_lin_vel_xy(self):
+        return torch.exp(
+            -torch.linalg.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
+        )
+
+    def _reward_lin_vel_z(self):
+        # Penalize z axis base linear velocity w. squared exp
+        return self._sqrdexp(self.base_lin_vel[:, 2] / self.scales["base_lin_vel"])
+
+    def _reward_orientation(self):
+        # Penalize non flat base orientation
+        return torch.sum(
+            self._sqrdexp(torch.square(self.projected_gravity[:, :2])), dim=1
+        )
+
+    def _reward_min_base_height(self):
+        """Squared exponential saturating at base_height target"""
+        error = self.base_height - self.cfg.reward_settings.base_height_target
+        error = torch.clamp(error, max=0, min=None).flatten()
+        return self._sqrdexp(error)
+
+    def _reward_tracking_lin_vel(self):
+        """Tracking of linear velocity commands (xy axes)"""
+        # just use lin_vel?
+        error = self.commands[:, :2] - self.base_lin_vel[:, :2]
+        # * scale by (1+|cmd|): if cmd=0, no scaling.
+        error *= 1.0 / (1.0 + torch.abs(self.commands[:, :2]))
+        return torch.mean(self._sqrdexp(error), dim=1)
+
+    def _reward_dof_vel(self):
+        # Penalize dof velocities
+        return torch.mean(
+            self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1
+        ) * self._switch("stand")
+
+    def _reward_dof_near_home(self):
+        return self._sqrdexp(
+            (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos"]
+        ).mean(dim=1)
+
+    def _reward_stand_still(self):
+        """Penalize motion at zero commands"""
+        # * normalize angles so we care about being within 5 deg
+        rew_pos = torch.mean(
+            self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1
+        )
+        rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
+        rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
+        rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
+        return rew_vel + rew_pos - rew_base_vel * self._switch("stand")
+
+    def _compute_grf(self, grf_norm=True):
+        grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
+        if grf_norm:
+            return torch.clamp_max(grf / self.cfg.asset.total_mass, 1.0)
+        else:
+            return grf
+
+    def smooth_sqr_wave(self, phase, sigma=0.2):  # sigma=0 is step function
+        return phase.sin() / (2 * torch.sqrt(phase.sin() ** 2.0 + sigma**2.0)) + 0.5
+
+    def _reward_hips_forward(self):
+        # reward hip motors for pointing forward
+        hip_yaw_abad = torch.cat((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
+        hip_yaw_abad -= torch.cat(
+            (self.default_dof_pos[:, 0:2], self.default_dof_pos[:, 5:7]), dim=1
+        )
+        hip_yaw_abad /= torch.cat(
+            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7])
+        )
+        return (hip_yaw_abad).pow(2).mean(dim=1)
+        # return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)
+
+    def _reward_power(self):
+        power = self.torques * self.dof_vel
+        return power.pow(2).mean(dim=1)
diff --git a/gym/envs/mit_humanoid/lander_config.py b/gym/envs/mit_humanoid/lander_config.py
new file mode 100644
index 00000000..86f3329f
--- /dev/null
+++ b/gym/envs/mit_humanoid/lander_config.py
@@ -0,0 +1,328 @@
+from gym.envs.base.legged_robot_config import (
+    LeggedRobotCfg,
+    LeggedRobotRunnerCfg,
+)
+
+BASE_HEIGHT_REF = 0.80
+
+
+class LanderCfg(LeggedRobotCfg):
+    class env(LeggedRobotCfg.env):
+        num_envs = 4096
+        num_actuators = 18
+        episode_length_s = 10  # episode length in seconds
+
+        sampled_history_length = 3  # n samples
+        sampled_history_frequency = 10  # [Hz]
+
+    class terrain(LeggedRobotCfg.terrain):
+        pass
+
+    class init_state(LeggedRobotCfg.init_state):
+        # * default setup chooses how the initial conditions are chosen.
+        # * "reset_to_basic" = a single position with added randomized noise.
+        # * "reset_to_range" = a range of joint positions and velocities.
+        # * "reset_to_traj" = feed in a trajectory to sample from.
+        reset_mode = "reset_to_range"
+
+        default_joint_angles = {
+            "hip_yaw": 0.0,
+            "hip_abad": 0.0,
+            "hip_pitch": -0.667751,
+            "knee": 1.4087,
+            "ankle": -0.708876,
+            "shoulder_pitch": 0.0,
+            "shoulder_abad": 0.0,
+            "shoulder_yaw": 0.0,
+            "elbow": -1.25,
+        }
+
+        # * default COM for basic initialization
+        pos = [0.0, 0.0, 0.6]  # x,y,z [m]
+        rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
+        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
+        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+
+        # * initialization for random range setup
+
+        dof_pos_range = {
+            "hip_yaw": [-0.0, 0.0],
+            "hip_abad": [-0.0, 0.0],
+            "hip_pitch": [-0.667751, -0.667751],
+            "knee": [1.4087, 1.4087],
+            "ankle": [-0.708876, -0.708876],
+            "shoulder_pitch": [0.0, 0.0],
+            "shoulder_abad": [0.0, 0.0],
+            "shoulder_yaw": [0.0, 0.0],
+            "elbow": [0, 0],
+        }
+        dof_vel_range = {
+            "hip_yaw": [-0.1, 0.1],
+            "hip_abad": [-0.1, 0.1],
+            "hip_pitch": [-0.1, 0.1],
+            "knee": [-0.1, 0.1],
+            "ankle": [-0.1, 0.1],
+            "shoulder_pitch": [0.0, 0.0],
+            "shoulder_abad": [0.0, 0.0],
+            "shoulder_yaw": [0.0, 0.0],
+            "elbow": [0.0, 0.0],
+        }
+
+        root_pos_range = [
+            [0.0, 0.0],  # x
+            [0.0, 0.0],  # y
+            [0.64, 1.5],  # z
+            [-0.1, 0.1],  # roll
+            [-0.1, 0.1],  # pitch
+            [-0.1, 0.1],
+        ]  # yaw
+
+        root_vel_range = [
+            [-0.75, 2.75],  # x
+            [-0.55, 0.55],  # y
+            [-2.5, 0.25],  # z
+            [-0.35, 0.35],  # roll
+            [-0.35, 0.35],  # pitch
+            [-0.35, 0.35],  # yaw
+        ]
+
+    class control(LeggedRobotCfg.control):
+        # * PD Drive parameters:
+        stiffness = {
+            "hip_yaw": 30.0,
+            "hip_abad": 30.0,
+            "hip_pitch": 30.0,
+            "knee": 30.0,
+            "ankle": 30.0,
+            "shoulder_pitch": 40.0,
+            "shoulder_abad": 40.0,
+            "shoulder_yaw": 40.0,
+            "elbow": 50.0,
+        }  # [N*m/rad]
+        damping = {
+            "hip_yaw": 2.0,
+            "hip_abad": 2.0,
+            "hip_pitch": 2.0,
+            "knee": 2.0,
+            "ankle": 2.0,
+            "shoulder_pitch": 2.0,
+            "shoulder_abad": 2.0,
+            "shoulder_yaw": 2.0,
+            "elbow": 1.0,
+        }  # [N*m*s/rad]
+
+        ctrl_frequency = 500
+        desired_sim_frequency = 500
+
+    # class oscillator:
+    #     base_frequency = 3.0  # [Hz]
+
+    class commands:
+        resampling_time = 10.0  # time before command are changed[s]
+
+        class ranges:
+            lin_vel_x = [-2.0, 2.0]  # min max [m/s] [-0.75, 0.75]
+            lin_vel_y = 0.3  # max [m/s]
+            yaw_vel = 1.0  # max [rad/s]
+
+    class push_robots:
+        toggle = True
+        interval_s = 2
+        max_push_vel_xy = 0.5
+        push_box_dims = [0.1, 0.1, 0.3]  # x,y,z [m]
+
+    class domain_rand:
+        randomize_friction = True
+        friction_range = [0.5, 1.25]
+        randomize_base_mass = True
+        added_mass_range = [-1.0, 1.0]
+
+    class asset(LeggedRobotCfg.asset):
+        file = (
+            "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
+            + "mit_humanoid/urdf/humanoid_F_sf_learnt.urdf"
+        )
+        # foot_collisionbox_names = ["foot"]
+        foot_name = "foot"
+        penalize_contacts_on = ["arm", "hand", "shoulder"]
+        terminate_after_contacts_on = ["base"]
+        flip_visual_attachments = False
+        self_collisions = 0  # 1 to disagble, 0 to enable...bitwise filter
+        collapse_fixed_joints = False
+        fix_base_link = False
+        disable_gravity = False
+        disable_motors = False
+        total_mass = 25.0
+
+    class reward_settings(LeggedRobotCfg.reward_settings):
+        soft_dof_pos_limit = 0.8
+        soft_dof_vel_limit = 0.8
+        soft_torque_limit = 0.8
+        max_contact_force = 1500.0
+        base_height_target = BASE_HEIGHT_REF
+        tracking_sigma = 0.25
+
+        # a smooth switch based on |cmd| (commanded velocity).
+        switch_scale = 0.5
+        switch_threshold = 0.2
+
+    class scaling(LeggedRobotCfg.scaling):
+        base_ang_vel = 2.5
+        base_lin_vel = 1.5
+        commands = 1
+        base_height = BASE_HEIGHT_REF
+        dof_pos = [
+            0.1,
+            0.2,
+            0.8,
+            0.8,
+            0.8,
+            0.1,
+            0.2,
+            0.8,
+            0.8,
+            0.8,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+        ]
+        # # * Action scales
+        dof_pos_target = dof_pos
+        dof_vel = [
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+        ]
+        dof_pos_history = 3 * dof_pos
+
+
+class LanderRunnerCfg(LeggedRobotRunnerCfg):
+    seed = -1
+    runner_class_name = "MyRunner"
+
+    class actor(LeggedRobotRunnerCfg.actor):
+        frequency = 100
+        init_noise_std = 1.0
+        hidden_dims = [512, 256, 128]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = ["elu", "elu", "tanh"]
+        layer_norm = True
+        smooth_exploration = False
+
+        obs = [
+            "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
+        ]
+        normalize_obs = False
+
+        actions = ["dof_pos_target"]
+        disable_actions = False
+
+        class noise:
+            dof_pos = 0.005
+            dof_vel = 0.05
+            base_ang_vel = 0.025
+            base_lin_vel = 0.025
+            projected_gravity = 0.01
+            feet_contact_state = 0.025
+
+    class critic(LeggedRobotRunnerCfg.critic):
+        hidden_dims = [512, 256, 128]
+        # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
+        activation = "elu"
+        layer_norm = True
+
+        obs = [
+            # "base_height",
+            "base_lin_vel",
+            "base_ang_vel",
+            "projected_gravity",
+            "commands",
+            "dof_pos_obs",
+            "dof_vel",
+            "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
+        ]
+        normalize_obs = False
+
+        class reward:
+            class weights:
+                torques = 5.0e-5
+                power = 1e-6  # 1.0e-2
+                min_base_height = 1.5
+                lin_vel_xy = 1.0
+                action_rate = 1e-2
+                action_rate2 = 1e-3
+                lin_vel_z = 0.0
+                ang_vel_xy = 0.0
+                dof_vel = 0.5
+                dof_pos_limits = 0.25
+                dof_near_home = 0.75
+                hips_forward = 0.0
+                collision = 1.0
+
+            class termination_weight:
+                termination = 1.0
+
+    class algorithm(LeggedRobotRunnerCfg.algorithm):
+        # both
+        gamma = 0.99
+        lam = 0.95
+        # shared
+        batch_size = 2**15
+        max_gradient_steps = 24
+        # new
+        storage_size = 2**17  # new
+        batch_size = 2**15  #  new
+
+        clip_param = 0.2
+        learning_rate = 1.0e-3
+        max_grad_norm = 1.0
+        # Critic
+        use_clipped_value_loss = True
+        # Actor
+        entropy_coef = 0.01
+        schedule = "adaptive"  # could be adaptive, fixed
+        desired_kl = 0.01
+        lr_range = [1e-5, 1e-2]
+        lr_ratio = 1.5
+
+    class runner(LeggedRobotRunnerCfg.runner):
+        policy_class_name = "ActorCritic"
+        algorithm_class_name = "PPO2"
+        num_steps_per_env = 24
+        max_iterations = 500
+        run_name = "lander"
+        experiment_name = "Humanoid"
+        save_interval = 50
diff --git a/gym/envs/mit_humanoid/mit_humanoid.py b/gym/envs/mit_humanoid/mit_humanoid.py
index 74e35d5a..cb4db70f 100644
--- a/gym/envs/mit_humanoid/mit_humanoid.py
+++ b/gym/envs/mit_humanoid/mit_humanoid.py
@@ -1,7 +1,6 @@
 import torch
 
 from gym.envs.base.legged_robot import LeggedRobot
-from .jacobian import _apply_coupling
 
 
 class MIT_Humanoid(LeggedRobot):
@@ -10,25 +9,148 @@ def __init__(self, gym, sim, cfg, sim_params, sim_device, headless):
 
     def _init_buffers(self):
         super()._init_buffers()
+        self._init_sampled_history_buffers()
 
+    def _init_sampled_history_buffers(self):
+        self.sampled_history_dof_pos_target = torch.zeros(
+            (self.num_envs, self.num_dof * self.cfg.env.sampled_history_length),
+            device=self.device,
+        )
+        self.sampled_history_dof_pos = torch.zeros(
+            self.num_envs,
+            self.num_dof * self.cfg.env.sampled_history_length,
+            device=self.device,
+        )
+        self.sampled_history_dof_vel = torch.zeros(
+            self.num_envs,
+            self.num_dof * self.cfg.env.sampled_history_length,
+            device=self.device,
+        )
+        self.sampled_history_counter = torch.zeros(
+            self.num_envs, dtype=int, device=self.device
+        )
+        self.sampled_history_threshold = int(
+            self.cfg.control.ctrl_frequency / self.cfg.env.sampled_history_frequency
+        )
+        self.J = torch.eye(self.num_dof).to(self.device)
+        self.J[4, 3] = 1
+        self.J[9, 8] = 1
+        self.J_inv_T = torch.inverse(self.J.T)
+
+    def _apply_coupling(self, q, qd, q_des, qd_des, kp, kd, tau_ff):
+        # Create a Jacobian matrix and move it to the same device as input tensors
+
+        # Perform transformations using Jacobian
+        q = torch.matmul(q, self.J.T)
+        qd = torch.matmul(qd, self.J.T)
+        q_des = torch.matmul(q_des, self.J.T)
+        qd_des = torch.matmul(qd_des, self.J.T)
+
+        # Compute feed-forward torques
+        tau_ff = torch.matmul(self.J_inv_T, tau_ff.T).T
+
+        # Compute kp and kd terms
+        kp = torch.diagonal(
+            torch.matmul(
+                torch.matmul(self.J_inv_T, torch.diag_embed(kp, dim1=-2, dim2=-1)),
+                self.J_inv_T.T,
+            ),
+            dim1=-2,
+            dim2=-1,
+        )
+
+        kd = torch.diagonal(
+            torch.matmul(
+                torch.matmul(self.J_inv_T, torch.diag_embed(kd, dim1=-2, dim2=-1)),
+                self.J_inv_T.T,
+            ),
+            dim1=-2,
+            dim2=-1,
+        )
+
+        # Compute torques
+        torques = kp * (q_des - q) + kd * (qd_des - qd) + tau_ff
+        torques = torch.matmul(torques, self.J)
+
+        return torques
+
+    def _reset_system(self, env_ids):
+        if len(env_ids) == 0:
+            return
+        super()._reset_system(env_ids)
+        self._reset_sampled_history_buffers(env_ids)
+        return
+
+    def _reset_sampled_history_buffers(self, ids):
+        n = self.cfg.env.sampled_history_length
+        self.sampled_history_dof_pos_target[ids] = self.dof_pos_target[ids].tile(n)
+        self.sampled_history_dof_pos[ids] = self.dof_pos[ids].tile(n)
+        self.sampled_history_dof_vel[ids] = self.dof_vel[ids].tile(n)
+
+    # compute_torques accounting for coupling, and filtering torques
     def _compute_torques(self):
-        self.desired_pos_target = self.dof_pos_target + self.default_dof_pos
-        q = self.dof_pos.clone()
-        qd = self.dof_vel.clone()
-        q_des = self.desired_pos_target.clone()
-        qd_des = self.dof_vel_target.clone()
-        tau_ff = self.tau_ff.clone()
-        kp = self.p_gains.clone()
-        kd = self.d_gains.clone()
-
-        if self.cfg.asset.apply_humanoid_jacobian:
-            torques = _apply_coupling(q, qd, q_des, qd_des, kp, kd, tau_ff)
-        else:
-            torques = kp * (q_des - q) + kd * (qd_des - qd) + tau_ff
+        torques = self._apply_coupling(
+            self.dof_pos,
+            self.dof_vel,
+            self.dof_pos_target + self.default_dof_pos,
+            self.dof_vel_target,
+            self.p_gains,
+            self.d_gains,
+            self.tau_ff,
+        )
+        return torques.clip(-self.torque_limits, self.torque_limits)
+
+    def _post_decimation_step(self):
+        super()._post_decimation_step()
+        self._update_sampled_history_buffers()
+
+    def _update_sampled_history_buffers(self):
+        self.sampled_history_counter += 1
+
+        ids = torch.nonzero(
+            self.sampled_history_counter == self.sampled_history_threshold,
+            as_tuple=False,
+        ).flatten()
+
+        self.sampled_history_dof_pos_target[ids] = torch.roll(
+            self.sampled_history_dof_pos_target[ids], self.num_dof, dims=1
+        )  # check
+        self.sampled_history_dof_pos_target[ids, : self.num_dof] = self.dof_pos_target[
+            ids
+        ]
+        self.sampled_history_dof_pos[ids] = torch.roll(
+            self.sampled_history_dof_pos[ids], self.num_dof, dims=1
+        )  # check
+        self.sampled_history_dof_pos[ids, : self.num_dof] = self.dof_pos[ids]
+        self.sampled_history_dof_vel[ids] = torch.roll(
+            self.sampled_history_dof_vel[ids], self.num_dof, dims=1
+        )  # check
+        self.sampled_history_dof_vel[ids, : self.num_dof] = self.dof_vel[ids]
 
-        torques = torch.clip(torques, -self.torque_limits, self.torque_limits)
+        self.sampled_history_counter[ids] = 0
 
-        return torques.view(self.torques.shape)
+    # --- rewards ---
+
+    def _switch(self, mode=None):
+        c_vel = torch.linalg.norm(self.commands, dim=1)
+        switch = torch.exp(
+            -torch.square(
+                torch.max(
+                    torch.zeros_like(c_vel),
+                    c_vel - self.cfg.reward_settings.switch_scale,
+                )
+            )
+            / self.cfg.reward_settings.switch_scale
+        )
+        if mode is None or mode == "stand":
+            return switch
+        elif mode == "move":
+            return 1 - switch
+
+    def _reward_lin_vel_xy(self):
+        return torch.exp(
+            -torch.linalg.norm(self.commands[:, :2] - self.base_lin_vel[:, :2], dim=1)
+        )
 
     def _reward_lin_vel_z(self):
         # Penalize z axis base linear velocity w. squared exp
@@ -56,12 +178,44 @@ def _reward_tracking_lin_vel(self):
 
     def _reward_dof_vel(self):
         # Penalize dof velocities
-        return torch.mean(self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1)
+        return torch.mean(
+            self._sqrdexp(self.dof_vel / self.scales["dof_vel"]), dim=1
+        ) * self._switch("stand")
 
     def _reward_dof_near_home(self):
-        return torch.mean(
-            self._sqrdexp(
-                (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos_obs"]
-            ),
-            dim=1,
+        return self._sqrdexp(
+            (self.dof_pos - self.default_dof_pos) / self.scales["dof_pos"]
+        ).mean(dim=1)
+
+    def _reward_stand_still(self):
+        """Penalize motion at zero commands"""
+        # * normalize angles so we care about being within 5 deg
+        rew_pos = torch.mean(
+            self._sqrdexp((self.dof_pos - self.default_dof_pos) / torch.pi * 36), dim=1
+        )
+        rew_vel = torch.mean(self._sqrdexp(self.dof_vel), dim=1)
+        rew_base_vel = torch.mean(torch.square(self.base_lin_vel), dim=1)
+        rew_base_vel += torch.mean(torch.square(self.base_ang_vel), dim=1)
+        return rew_vel + rew_pos - rew_base_vel * self._switch("stand")
+
+    def _compute_grf(self, grf_norm=True):
+        grf = torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1)
+        if grf_norm:
+            return torch.clamp_max(grf / self.cfg.asset.total_mass, 1.0)
+        else:
+            return grf
+
+    def smooth_sqr_wave(self, phase, sigma=0.2):  # sigma=0 is step function
+        return phase.sin() / (2 * torch.sqrt(phase.sin() ** 2.0 + sigma**2.0)) + 0.5
+
+    def _reward_hips_forward(self):
+        # reward hip motors for pointing forward
+        hip_yaw_abad = torch.cat((self.dof_pos[:, 0:2], self.dof_pos[:, 5:7]), dim=1)
+        hip_yaw_abad -= torch.cat(
+            (self.default_dof_pos[:, 0:2], self.default_dof_pos[:, 5:7]), dim=1
+        )
+        hip_yaw_abad /= torch.cat(
+            (self.scales["dof_pos"][0:2], self.scales["dof_pos"][5:7])
         )
+        return (hip_yaw_abad).pow(2).mean(dim=1)
+        # return self._sqrdexp(hip_yaw_abad).sum(dim=1).mean(dim=1)
diff --git a/gym/envs/mit_humanoid/mit_humanoid_config.py b/gym/envs/mit_humanoid/mit_humanoid_config.py
index c283b792..861f2826 100644
--- a/gym/envs/mit_humanoid/mit_humanoid_config.py
+++ b/gym/envs/mit_humanoid/mit_humanoid_config.py
@@ -3,14 +3,17 @@
     LeggedRobotRunnerCfg,
 )
 
+BASE_HEIGHT_REF = 0.80
+
 
 class MITHumanoidCfg(LeggedRobotCfg):
     class env(LeggedRobotCfg.env):
         num_envs = 4096
-        num_observations = 49 + 3 * 18  # 121
         num_actuators = 18
-        episode_length_s = 100  # episode length in seconds
-        num_privileged_obs = num_observations
+        episode_length_s = 5  # episode length in seconds
+
+        sampled_history_length = 3  # n samples
+        sampled_history_frequency = 10  # [Hz]
 
     class terrain(LeggedRobotCfg.terrain):
         pass
@@ -25,17 +28,17 @@ class init_state(LeggedRobotCfg.init_state):
         default_joint_angles = {
             "hip_yaw": 0.0,
             "hip_abad": 0.0,
-            "hip_pitch": -0.4,
-            "knee": 0.9,
-            "ankle": -0.45,
+            "hip_pitch": -0.667751,
+            "knee": 1.4087,
+            "ankle": -0.708876,
             "shoulder_pitch": 0.0,
             "shoulder_abad": 0.0,
             "shoulder_yaw": 0.0,
-            "elbow": 0.0,
+            "elbow": -1.25,
         }
 
         # * default COM for basic initialization
-        pos = [0.0, 0.0, 0.66]  # x,y,z [m]
+        pos = [0.0, 0.0, 0.6]  # x,y,z [m]
         rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
         lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
         ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
@@ -43,22 +46,22 @@ class init_state(LeggedRobotCfg.init_state):
         # * initialization for random range setup
 
         dof_pos_range = {
-            "hip_yaw": [0.0, 0.0],
-            "hip_abad": [0.0, 0.0],
-            "hip_pitch": [-0.29, -0.25],
-            "knee": [0.67, 0.71],
-            "ankle": [-0.43, -0.39],
+            "hip_yaw": [-0.0, 0.0],
+            "hip_abad": [-0.0, 0.0],
+            "hip_pitch": [-0.667751, -0.667751],
+            "knee": [1.4087, 1.4087],
+            "ankle": [-0.708876, -0.708876],
             "shoulder_pitch": [0.0, 0.0],
             "shoulder_abad": [0.0, 0.0],
             "shoulder_yaw": [0.0, 0.0],
-            "elbow": [0.0, 0.0],
+            "elbow": [0, 0],
         }
         dof_vel_range = {
-            "hip_yaw": [-0.0, 0.1],
-            "hip_abad": [-0.0, 0.1],
-            "hip_pitch": [-0.1, -0.1],
-            "knee": [-0.05, 0.05],
-            "ankle": [-0.05, 0.05],
+            "hip_yaw": [-0.1, 0.1],
+            "hip_abad": [-0.1, 0.1],
+            "hip_pitch": [-0.1, 0.1],
+            "knee": [-0.1, 0.1],
+            "ankle": [-0.1, 0.1],
             "shoulder_pitch": [0.0, 0.0],
             "shoulder_abad": [0.0, 0.0],
             "shoulder_yaw": [0.0, 0.0],
@@ -68,20 +71,20 @@ class init_state(LeggedRobotCfg.init_state):
         root_pos_range = [
             [0.0, 0.0],  # x
             [0.0, 0.0],  # y
-            [0.7, 0.72],  # z
+            [0.64, 0.7],  # z
             [-0.1, 0.1],  # roll
             [-0.1, 0.1],  # pitch
             [-0.1, 0.1],
         ]  # yaw
 
         root_vel_range = [
-            [-0.1, 0.1],  # x
-            [-0.1, 0.1],  # y
-            [-0.1, 0.1],  # z
-            [-0.1, 0.1],  # roll
-            [-0.1, 0.1],  # pitch
-            [-0.1, 0.1],
-        ]  # yaw
+            [-0.75, 2.75],  # x
+            [-0.55, 0.55],  # y
+            [-0.35, 0.1],  # z
+            [-0.35, 0.35],  # roll
+            [-0.35, 0.35],  # pitch
+            [-0.35, 0.35],  # yaw
+        ]
 
     class control(LeggedRobotCfg.control):
         # * PD Drive parameters:
@@ -94,39 +97,44 @@ class control(LeggedRobotCfg.control):
             "shoulder_pitch": 40.0,
             "shoulder_abad": 40.0,
             "shoulder_yaw": 40.0,
-            "elbow": 40.0,
+            "elbow": 50.0,
         }  # [N*m/rad]
         damping = {
-            "hip_yaw": 5.0,
-            "hip_abad": 5.0,
-            "hip_pitch": 5.0,
-            "knee": 5.0,
-            "ankle": 5.0,
-            "shoulder_pitch": 5.0,
-            "shoulder_abad": 5.0,
-            "shoulder_yaw": 5.0,
-            "elbow": 5.0,
+            "hip_yaw": 2.0,
+            "hip_abad": 2.0,
+            "hip_pitch": 2.0,
+            "knee": 2.0,
+            "ankle": 2.0,
+            "shoulder_pitch": 2.0,
+            "shoulder_abad": 2.0,
+            "shoulder_yaw": 2.0,
+            "elbow": 1.0,
         }  # [N*m*s/rad]
 
         ctrl_frequency = 100
-        desired_sim_frequency = 800
+        desired_sim_frequency = 500
+
+        filter_gain = 0.1586  # 1: no filtering, 0: wall
+
+    class oscillator:
+        base_frequency = 3.0  # [Hz]
 
     class commands:
         resampling_time = 10.0  # time before command are changed[s]
 
         class ranges:
-            lin_vel_x = [-1.0, 1.0]  # min max [m/s]
-            lin_vel_y = 1.0  # max [m/s]
-            yaw_vel = 1  # max [rad/s]
+            lin_vel_x = [-0.0, 4.0]  # min max [m/s] [-0.75, 0.75]
+            lin_vel_y = 0.0  # max [m/s]
+            yaw_vel = 0.0  # max [rad/s]
 
     class push_robots:
-        toggle = False
-        interval_s = 15
-        max_push_vel_xy = 0.05
-        push_box_dims = [0.1, 0.2, 0.3]  # x,y,z [m]
+        toggle = True
+        interval_s = 1
+        max_push_vel_xy = 0.5
+        push_box_dims = [0.1, 0.1, 0.3]  # x,y,z [m]
 
     class domain_rand:
-        randomize_friction = False
+        randomize_friction = True
         friction_range = [0.5, 1.25]
         randomize_base_mass = True
         added_mass_range = [-1.0, 1.0]
@@ -134,47 +142,84 @@ class domain_rand:
     class asset(LeggedRobotCfg.asset):
         file = (
             "{LEGGED_GYM_ROOT_DIR}/resources/robots/"
-            + "mit_humanoid/urdf/humanoid_R_sf.urdf"
+            + "mit_humanoid/urdf/humanoid_F_sf_learnt.urdf"
         )
+        # foot_collisionbox_names = ["foot"]
         foot_name = "foot"
-        penalize_contacts_on = ["base", "arm"]
+        penalize_contacts_on = ["arm"]
         terminate_after_contacts_on = ["base"]
-        end_effector_names = ["hand", "foot"]
+        end_effector_names = ["hand", "foot"]  # ??
         flip_visual_attachments = False
-        self_collisions = 1  # 1 to disagble, 0 to enable...bitwise filter
+        self_collisions = 0  # 1 to disagble, 0 to enable...bitwise filter
         collapse_fixed_joints = False
         # * see GymDofDriveModeFlags
         # * (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
         default_dof_drive_mode = 3
+        fix_base_link = False
         disable_gravity = False
         disable_motors = False
-        apply_humanoid_jacobian = False
+        total_mass = 25.0
 
     class reward_settings(LeggedRobotCfg.reward_settings):
-        soft_dof_pos_limit = 0.9
-        soft_dof_vel_limit = 0.9
-        soft_torque_limit = 0.9
+        soft_dof_pos_limit = 0.8
+        soft_dof_vel_limit = 0.8
+        soft_torque_limit = 0.8
         max_contact_force = 1500.0
+        base_height_target = BASE_HEIGHT_REF
+        tracking_sigma = 0.25
 
-        base_height_target = 0.65
-        tracking_sigma = 0.5
+        # a smooth switch based on |cmd| (commanded velocity).
+        switch_scale = 0.5
+        switch_threshold = 0.2
 
     class scaling(LeggedRobotCfg.scaling):
-        # * dimensionless time: sqrt(L/g) or sqrt(I/[mgL]), with I=I0+mL^2
-        virtual_leg_length = 0.65
-        dimensionless_time = (virtual_leg_length / 9.81) ** 0.5
-        base_height = virtual_leg_length
-        base_lin_vel = virtual_leg_length / dimensionless_time
-        base_ang_vel = 3.14 / dimensionless_time
-        dof_vel = 20  # ought to be roughly max expected speed.
-        height_measurements = virtual_leg_length
-
-        # todo check order of joints, create per-joint scaling
-        dof_pos = 3.14
-        dof_pos_obs = dof_pos
-        # * Action scales
+        base_ang_vel = 2.5
+        base_lin_vel = 1.5
+        commands = 1
+        base_height = BASE_HEIGHT_REF
+        dof_pos = [
+            0.1,
+            0.2,
+            0.8,
+            0.8,
+            0.8,
+            0.1,
+            0.2,
+            0.8,
+            0.8,
+            0.8,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+            0.1,
+        ]
+        # # * Action scales
         dof_pos_target = dof_pos
-        tau_ff = 0.1
+        dof_vel = [
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            0.5,
+            1.0,
+            4.0,
+            4.0,
+            2.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+            1.0,
+        ]
+        dof_pos_history = 3 * dof_pos
 
 
 class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
@@ -182,11 +227,12 @@ class MITHumanoidRunnerCfg(LeggedRobotRunnerCfg):
     runner_class_name = "OnPolicyRunner"
 
     class actor(LeggedRobotRunnerCfg.actor):
+        frequency = 100
         init_noise_std = 1.0
         hidden_dims = [512, 256, 128]
-        critic_hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
+        layer_norm = [True, True, False]
         smooth_exploration = False
 
         obs = [
@@ -197,60 +243,82 @@ class actor(LeggedRobotRunnerCfg.actor):
             "commands",
             "dof_pos_obs",
             "dof_vel",
-            "dof_pos_history",
+            # "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         actions = ["dof_pos_target"]
         disable_actions = False
 
         class noise:
-            base_height = 0.05
-            dof_pos_obs = 0.0
-            dof_vel = 0.0
-            base_lin_vel = 0.1
-            base_ang_vel = 0.2
-            projected_gravity = 0.05
-            height_measurements = 0.1
+            dof_pos = 0.005
+            dof_vel = 0.05
+            base_ang_vel = 0.025
+            base_lin_vel = 0.025
+            projected_gravity = 0.01
+            feet_contact_state = 0.025
 
     class critic(LeggedRobotRunnerCfg.critic):
         hidden_dims = [512, 256, 128]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "elu"
+        layer_norm = [True, True, False]
 
         obs = [
             "base_height",
             "base_lin_vel",
             "base_ang_vel",
             "projected_gravity",
-            "commands",
             "dof_pos_obs",
             "dof_vel",
-            "dof_pos_history",
+            "sampled_history_dof_pos",
+            "sampled_history_dof_vel",
+            "sampled_history_dof_pos_target",
         ]
-        normalize_obs = True
+        normalize_obs = False
 
         class reward:
             class weights:
+                tracking_lin_vel = 4.0
                 tracking_ang_vel = 0.5
-                tracking_lin_vel = 0.5
-                orientation = 1.5
-                torques = 5.0e-6
+                # orientation = 1.0
+                torques = 5.0e-4
                 min_base_height = 1.5
-                action_rate = 0.01
-                action_rate2 = 0.001
+                action_rate = 1e-3
+                action_rate2 = 1e-3
                 lin_vel_z = 0.0
                 ang_vel_xy = 0.0
-                dof_vel = 0.0
-                stand_still = 0.0
-                dof_pos_limits = 0.0
-                dof_near_home = 0.5
+                # dof_vel = 0.25
+                # stand_still = 0.25
+                dof_pos_limits = 0.25
+                dof_near_home = 0.25
+                hips_forward = 0.0
+                walk_freq = 0.0  # 2.5
 
             class termination_weight:
                 termination = 15
 
     class algorithm(LeggedRobotRunnerCfg.algorithm):
-        pass
+        # both
+        gamma = 0.99
+        lam = 0.95
+        # shared
+        batch_size = 4096
+        max_gradient_steps = 48
+        clip_param = 0.2
+        learning_rate = 1.0e-3
+        max_grad_norm = 1.0
+        # Critic
+        use_clipped_value_loss = True
+        # Actor
+        entropy_coef = 0.01
+        schedule = "adaptive"  # could be adaptive, fixed
+        desired_kl = 0.01
+        lr_range = [1e-5, 1e-2]
+        lr_ratio = 1.5
 
     class runner(LeggedRobotRunnerCfg.runner):
         policy_class_name = "ActorCritic"
diff --git a/gym/envs/pendulum/pendulum_SAC_config.py b/gym/envs/pendulum/pendulum_SAC_config.py
index 0125cee4..42bf0265 100644
--- a/gym/envs/pendulum/pendulum_SAC_config.py
+++ b/gym/envs/pendulum/pendulum_SAC_config.py
@@ -17,7 +17,7 @@ class init_state(PendulumCfg.init_state):
         dof_vel_range = {"theta": [-5, 5]}
 
     class control(PendulumCfg.control):
-        ctrl_frequency = 10
+        ctrl_frequency = 100
         desired_sim_frequency = 100
 
     class asset(PendulumCfg.asset):
@@ -37,16 +37,14 @@ class PendulumSACRunnerCfg(FixedRobotCfgPPO):
     runner_class_name = "OffPolicyRunner"
 
     class actor(FixedRobotCfgPPO.actor):
+        frequency = 10
         latent_nn = {"hidden_dims": [128, 64], "activation": "elu", "layer_norm": True}
         mean_nn = {"hidden_dims": [32], "activation": "elu", "layer_norm": True}
         std_nn = {"hidden_dims": [32], "activation": "elu", "layer_norm": True}
         nn_params = {"latent": latent_nn, "mean": mean_nn, "std": std_nn}
 
         normalize_obs = False
-        obs = [
-            "dof_pos_obs",
-            "dof_vel",
-        ]
+        obs = ["dof_pos_obs", "dof_vel"]
         actions = ["tau_ff"]
         disable_actions = False
 
diff --git a/gym/envs/pendulum/pendulum_config.py b/gym/envs/pendulum/pendulum_config.py
index bb33c1f1..910fe341 100644
--- a/gym/envs/pendulum/pendulum_config.py
+++ b/gym/envs/pendulum/pendulum_config.py
@@ -62,6 +62,7 @@ class PendulumRunnerCfg(FixedRobotCfgPPO):
     runner_class_name = "OnPolicyRunner"
 
     class actor(FixedRobotCfgPPO.actor):
+        frequency = 25
         hidden_dims = [128, 64, 32]
         # * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
         activation = "tanh"
diff --git a/gym/utils/helpers.py b/gym/utils/helpers.py
index 25391ffb..541482fe 100644
--- a/gym/utils/helpers.py
+++ b/gym/utils/helpers.py
@@ -191,7 +191,7 @@ def get_args(custom_parameters=None):
         {
             "name": "--task",
             "type": str,
-            "default": "pendulum",
+            "default": "lander",
             "help": "Resume training or start testing from a checkpoint. "
             "Overrides config file if provided.",
         },
diff --git a/gym/utils/task_registry.py b/gym/utils/task_registry.py
index 477049f3..bc8d49d5 100644
--- a/gym/utils/task_registry.py
+++ b/gym/utils/task_registry.py
@@ -178,6 +178,9 @@ def set_control_and_sim_dt(self, env_cfg, train_cfg):
                 f" to {env_cfg.sim_dt}."
             )
 
+        if not hasattr(train_cfg.actor, "frequency"):
+            train_cfg.actor.frequency = env_cfg.control.ctrl_frequency
+
     def set_discount_rates(self, train_cfg, dt):
         if hasattr(train_cfg.algorithm, "discount_horizon"):
             hrzn = train_cfg.algorithm.discount_horizon
diff --git a/learning/modules/QRCritics.py b/learning/modules/QRCritics.py
index d2ed9378..9dd51489 100644
--- a/learning/modules/QRCritics.py
+++ b/learning/modules/QRCritics.py
@@ -19,7 +19,7 @@ def init_weights(m):
         m.bias.data.fill_(0.01)
 
 
-class Critic(nn.Module):
+class Critic2(nn.Module):
     def __init__(
         self,
         num_obs,
diff --git a/learning/runners/BaseRunner.py b/learning/runners/BaseRunner.py
index 01a1b825..1545293b 100644
--- a/learning/runners/BaseRunner.py
+++ b/learning/runners/BaseRunner.py
@@ -8,8 +8,8 @@ class BaseRunner:
     def __init__(self, env, train_cfg, device="cpu"):
         self.device = device
         self.env = env
+        self.setup_reward_functions()
         self.parse_train_cfg(train_cfg)
-
         self.num_steps_per_env = self.cfg["num_steps_per_env"]
         self.save_interval = self.cfg["save_interval"]
         self.num_learning_iterations = self.cfg["max_iterations"]
@@ -18,6 +18,13 @@ def __init__(self, env, train_cfg, device="cpu"):
         self.log_dir = train_cfg["log_dir"]
         self._set_up_alg()
 
+    def setup_reward_functions(self):
+        self.reward_functions = {
+            method.replace("_reward_", ""): getattr(self.env, method)
+            for method in dir(self.env)
+            if callable(getattr(self.env, method)) and method.startswith("_reward_")
+        }
+
     def _set_up_alg(self):
         num_actor_obs = self.get_obs_size(self.actor_cfg["obs"])
         num_actions = self.get_action_size(self.actor_cfg["actions"])
@@ -95,8 +102,8 @@ def get_rewards(self, reward_weights, modifier=1, mask=None):
         if mask is None:
             mask = 1.0
         for name, weight in reward_weights.items():
-            rewards_dict[name] = mask * self._get_reward({name: weight}, modifier)
+            rewards_dict[name] = mask * self._get_reward(name, weight * modifier)
         return rewards_dict
 
-    def _get_reward(self, name_weight, modifier=1):
-        return modifier * self.env.compute_reward(name_weight).to(self.device)
+    def _get_reward(self, name, weight):
+        return weight * self.reward_functions[name]().to(self.device)
diff --git a/learning/runners/my_runner.py b/learning/runners/my_runner.py
index 17e0817b..e0da1a1b 100644
--- a/learning/runners/my_runner.py
+++ b/learning/runners/my_runner.py
@@ -16,12 +16,6 @@
 class MyRunner(OnPolicyRunner):
     def __init__(self, env, train_cfg, device="cpu"):
         super().__init__(env, train_cfg, device)
-        logger.initialize(
-            self.env.num_envs,
-            self.env.dt,
-            self.cfg["max_iterations"],
-            self.device,
-        )
 
     def _set_up_alg(self):
         num_actor_obs = self.get_obs_size(self.actor_cfg["obs"])
@@ -33,10 +27,12 @@ def _set_up_alg(self):
         alg_class = eval(self.cfg["algorithm_class_name"])
         self.alg = alg_class(actor, critic, device=self.device, **self.alg_cfg)
 
-    def learn(self):
-        self.set_up_logger()
+    def learn(self, states_to_log_dict=None):
+        n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
+        assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+        self.set_up_logger(dt=self.env.dt * n_policy_steps)
 
-        rewards_dict = {}
+        rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
         self.alg.switch_to_train()
         actor_obs = self.get_obs(self.actor_cfg["obs"])
@@ -73,6 +69,13 @@ def learn(self):
         for self.it in range(self.it + 1, tot_iter + 1):
             logger.tic("iteration")
             logger.tic("collection")
+
+            # * Simulate environment and log states
+            if states_to_log_dict is not None:
+                it_idx = self.it - 1
+                if it_idx % 10 == 0:
+                    self.sim_and_log_states(states_to_log_dict, it_idx)
+
             # * Rollout
             with torch.inference_mode():
                 for i in range(self.num_steps_per_env):
@@ -90,36 +93,38 @@ def learn(self):
                             "critic_obs": critic_obs,
                         }
                     )
+                    for step in range(n_policy_steps):
+                        self.env.step()
+                        # put reward integration here
+                        self.update_rewards_dict(rewards_dict, step)
+                    else:
+                        # catch and reset failed envs
+                        to_be_reset = self.env.timed_out | self.env.terminated
+                        env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                        self.env._reset_idx(env_ids)
 
-                    self.env.step()
-
+                    total_rewards = torch.stack(
+                        tuple(rewards_dict.sum(dim=0).values())
+                    ).sum(dim=(0))
                     actor_obs = self.get_noisy_obs(
                         self.actor_cfg["obs"], self.actor_cfg["noise"]
                     )
                     critic_obs = self.get_obs(self.critic_cfg["obs"])
-                    # * get time_outs
-                    timed_out = self.get_timed_out()
-                    terminated = self.get_terminated()
-                    dones = timed_out | terminated
-
-                    self.update_rewards(rewards_dict, terminated)
-                    total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
 
                     transition.update(
                         {
                             "next_actor_obs": actor_obs,
                             "next_critic_obs": critic_obs,
                             "rewards": total_rewards,
-                            "timed_out": timed_out,
-                            "terminated": terminated,
-                            "dones": dones,
+                            "timed_out": self.env.timed_out,
+                            "dones": self.env.timed_out | self.env.terminated,
                         }
                     )
                     storage.add_transitions(transition)
 
-                    logger.log_rewards(rewards_dict)
+                    logger.log_rewards(rewards_dict.sum(dim=0))
                     logger.log_rewards({"total_rewards": total_rewards})
-                    logger.finish_step(dones)
+                    logger.finish_step(self.env.timed_out | self.env.terminated)
             logger.toc("collection")
 
             logger.tic("learning")
@@ -137,7 +142,28 @@ def learn(self):
                 self.save()
         self.save()
 
-    def set_up_logger(self):
+    @torch.no_grad
+    def burn_in_normalization(self, n_iterations=100):
+        actor_obs = self.get_obs(self.actor_cfg["obs"])
+        critic_obs = self.get_obs(self.critic_cfg["obs"])
+        for _ in range(n_iterations):
+            actions = self.alg.act(actor_obs)
+            self.set_actions(self.actor_cfg["actions"], actions)
+            self.env.step()
+            actor_obs = self.get_noisy_obs(
+                self.actor_cfg["obs"], self.actor_cfg["noise"]
+            )
+            critic_obs = self.get_obs(self.critic_cfg["obs"])
+            self.alg.critic.evaluate(critic_obs)
+        self.env.reset()
+
+    def set_up_logger(self, dt=None):
+        if dt is None:
+            dt = self.env.dt
+        logger.initialize(
+            self.env.num_envs, dt, self.cfg["max_iterations"], self.device
+        )
+
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
             list(self.critic_cfg["reward"]["termination_weight"].keys())
@@ -151,15 +177,83 @@ def set_up_logger(self):
         logger.register_category("actor", self.alg.actor, ["action_std", "entropy"])
         logger.attach_torch_obj_to_wandb((self.alg.actor, self.alg.critic))
 
-    @torch.no_grad
-    def burn_in_normalization(self, n_iterations=100):
-        actor_obs = self.get_obs(self.actor_cfg["obs"])
-        for _ in range(n_iterations):
-            actions = self.alg.act(actor_obs)
-            self.set_actions(self.actor_cfg["actions"], actions)
-            self.env.step()
-            actor_obs = self.get_noisy_obs(
-                self.actor_cfg["obs"], self.actor_cfg["noise"]
+    def update_rewards(self, rewards_dict, terminated):
+        # sum existing rewards with new rewards
+
+        rewards_dict.update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["termination_weight"], mask=terminated
             )
-        print(f"Value offset: {self.alg.critic.value_offset.item()}")
-        self.env.reset()
+        )
+        rewards_dict.update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["weights"],
+                modifier=self.env.dt,
+                mask=~terminated,
+            )
+        )
+
+    def initialize_rewards_dict(self, n_steps):
+        # sum existing rewards with new rewards
+        rewards_dict = TensorDict(
+            {}, batch_size=(n_steps, self.env.num_envs), device=self.device
+        )
+        for key in self.critic_cfg["reward"]["termination_weight"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        for key in self.critic_cfg["reward"]["weights"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        return rewards_dict
+
+    def update_rewards_dict(self, rewards_dict, step):
+        # sum existing rewards with new rewards
+        rewards_dict[step].update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["termination_weight"],
+                modifier=self.env.dt,
+                mask=self.env.terminated,
+            ),
+            inplace=True,
+        )
+        rewards_dict[step].update(
+            self.get_rewards(
+                self.critic_cfg["reward"]["weights"],
+                modifier=self.env.dt,
+                mask=~self.env.terminated,
+            ),
+            inplace=True,
+        )
+
+    def sim_and_log_states(self, states_to_log_dict, it_idx):
+        # Simulate environment for as many steps as expected in the dict.
+        # Log states to the dict, as well as whether the env terminated.
+        steps = states_to_log_dict["terminated"].shape[2]
+        actor_obs = self.get_obs(self.policy_cfg["actor_obs"])
+
+        with torch.inference_mode():
+            for i in range(steps):
+                actions = self.alg.act(actor_obs)
+                self.set_actions(
+                    self.policy_cfg["actions"],
+                    actions,
+                    self.policy_cfg["disable_actions"],
+                )
+
+                self.env.step()
+
+                actor_obs = self.get_noisy_obs(
+                    self.policy_cfg["actor_obs"], self.policy_cfg["noise"]
+                )
+
+                # Log states (just for the first env)
+                terminated = self.get_terminated()[0]
+                for state in states_to_log_dict:
+                    if state == "terminated":
+                        states_to_log_dict[state][0, it_idx, i, :] = terminated
+                    else:
+                        states_to_log_dict[state][0, it_idx, i, :] = getattr(
+                            self.env, state
+                        )[0, :]
diff --git a/learning/runners/off_policy_runner.py b/learning/runners/off_policy_runner.py
index 7aed58e7..83249aef 100644
--- a/learning/runners/off_policy_runner.py
+++ b/learning/runners/off_policy_runner.py
@@ -16,12 +16,6 @@
 class OffPolicyRunner(BaseRunner):
     def __init__(self, env, train_cfg, device="cpu"):
         super().__init__(env, train_cfg, device)
-        logger.initialize(
-            self.env.num_envs,
-            self.env.dt,
-            self.cfg["max_iterations"],
-            self.device,
-        )
 
     def _set_up_alg(self):
         num_actor_obs = self.get_obs_size(self.actor_cfg["obs"])
@@ -46,9 +40,11 @@ def _set_up_alg(self):
         )
 
     def learn(self):
-        self.set_up_logger()
+        n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
+        assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+        self.set_up_logger(dt=self.env.dt * n_policy_steps)
 
-        rewards_dict = {}
+        rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
         self.alg.switch_to_train()
         actor_obs = self.get_obs(self.actor_cfg["obs"])
@@ -94,28 +90,32 @@ def learn(self):
                     }
                 )
 
-                self.env.step()
+                for step in range(n_policy_steps):
+                    self.env.step()
+                    # put reward integration here
+                    self.update_rewards_dict(rewards_dict, step)
+                else:
+                    # catch and reset failed envs
+                    to_be_reset = self.env.timed_out | self.env.terminated
+                    env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                    self.env._reset_idx(env_ids)
+
+                total_rewards = torch.stack(
+                    tuple(rewards_dict.sum(dim=0).values())
+                ).sum(dim=(0))
 
                 actor_obs = self.get_noisy_obs(
                     self.actor_cfg["obs"], self.actor_cfg["noise"]
                 )
                 critic_obs = self.get_obs(self.critic_cfg["obs"])
 
-                # * get time_outs
-                timed_out = self.get_timed_out()
-                terminated = self.get_terminated()
-                dones = timed_out | terminated
-
-                self.update_rewards(rewards_dict, terminated)
-                total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
-
                 transition.update(
                     {
                         "next_actor_obs": actor_obs,
                         "next_critic_obs": critic_obs,
                         "rewards": total_rewards,
-                        "timed_out": timed_out,
-                        "dones": dones,
+                        "timed_out": self.env.timed_out,
+                        "dones": self.env.timed_out | self.env.terminated,
                     }
                 )
                 storage.add_transitions(transition)
@@ -147,35 +147,39 @@ def learn(self):
                         }
                     )
 
-                    self.env.step()
+                    for step in range(n_policy_steps):
+                        self.env.step()
+                        # put reward integration here
+                        self.update_rewards_dict(rewards_dict, step)
+                    else:
+                        # catch and reset failed envs
+                        to_be_reset = self.env.timed_out | self.env.terminated
+                        env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                        self.env._reset_idx(env_ids)
+
+                    total_rewards = torch.stack(
+                        tuple(rewards_dict.sum(dim=0).values())
+                    ).sum(dim=(0))
 
                     actor_obs = self.get_noisy_obs(
                         self.actor_cfg["obs"], self.actor_cfg["noise"]
                     )
                     critic_obs = self.get_obs(self.critic_cfg["obs"])
 
-                    # * get time_outs
-                    timed_out = self.get_timed_out()
-                    terminated = self.get_terminated()
-                    dones = timed_out | terminated
-
-                    self.update_rewards(rewards_dict, terminated)
-                    total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
-
                     transition.update(
                         {
                             "next_actor_obs": actor_obs,
                             "next_critic_obs": critic_obs,
                             "rewards": total_rewards,
-                            "timed_out": timed_out,
-                            "dones": dones,
+                            "timed_out": self.env.timed_out,
+                            "dones": self.env.timed_out | self.env.terminated,
                         }
                     )
                     storage.add_transitions(transition)
 
-                    logger.log_rewards(rewards_dict)
+                    logger.log_rewards(rewards_dict.sum(dim=0))
                     logger.log_rewards({"total_rewards": total_rewards})
-                    logger.finish_step(dones)
+                    logger.finish_step(self.env.timed_out | self.env.terminated)
             logger.toc("collection")
 
             logger.tic("learning")
@@ -192,21 +196,47 @@ def learn(self):
                 self.save()
         self.save()
 
-    def update_rewards(self, rewards_dict, terminated):
-        rewards_dict.update(
+    def update_rewards_dict(self, rewards_dict, step):
+        # sum existing rewards with new rewards
+        rewards_dict[step].update(
             self.get_rewards(
-                self.critic_cfg["reward"]["termination_weight"], mask=terminated
-            )
+                self.critic_cfg["reward"]["termination_weight"],
+                modifier=self.env.dt,
+                mask=self.env.terminated,
+            ),
+            inplace=True,
         )
-        rewards_dict.update(
+        rewards_dict[step].update(
             self.get_rewards(
                 self.critic_cfg["reward"]["weights"],
                 modifier=self.env.dt,
-                mask=~terminated,
+                mask=~self.env.terminated,
+            ),
+            inplace=True,
+        )
+
+    def initialize_rewards_dict(self, n_steps):
+        # sum existing rewards with new rewards
+        rewards_dict = TensorDict(
+            {}, batch_size=(n_steps, self.env.num_envs), device=self.device
+        )
+        for key in self.critic_cfg["reward"]["termination_weight"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        for key in self.critic_cfg["reward"]["weights"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
             )
+        return rewards_dict
+
+    def set_up_logger(self, dt=None):
+        if dt is None:
+            dt = self.env.dt
+        logger.initialize(
+            self.env.num_envs, dt, self.cfg["max_iterations"], self.device
         )
 
-    def set_up_logger(self):
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
             list(self.critic_cfg["reward"]["termination_weight"].keys())
diff --git a/learning/runners/on_policy_runner.py b/learning/runners/on_policy_runner.py
index 0a81680d..4831b615 100644
--- a/learning/runners/on_policy_runner.py
+++ b/learning/runners/on_policy_runner.py
@@ -14,17 +14,13 @@
 class OnPolicyRunner(BaseRunner):
     def __init__(self, env, train_cfg, device="cpu"):
         super().__init__(env, train_cfg, device)
-        logger.initialize(
-            self.env.num_envs,
-            self.env.dt,
-            self.cfg["max_iterations"],
-            self.device,
-        )
 
     def learn(self, states_to_log_dict=None):
-        self.set_up_logger()
+        n_policy_steps = int((1 / self.env.dt) / self.actor_cfg["frequency"])
+        assert n_policy_steps > 0, "actor frequency should be less than ctrl_freq"
+        self.set_up_logger(dt=self.env.dt * n_policy_steps)
 
-        rewards_dict = {}
+        rewards_dict = self.initialize_rewards_dict(n_policy_steps)
 
         self.alg.switch_to_train()
         actor_obs = self.get_obs(self.actor_cfg["obs"])
@@ -86,37 +82,39 @@ def learn(self, states_to_log_dict=None):
                             "critic_obs": critic_obs,
                         }
                     )
+                    for step in range(n_policy_steps):
+                        self.env.step()
+                        # put reward integration here
+                        self.update_rewards_dict(rewards_dict, step)
+                    else:
+                        # catch and reset failed envs
+                        to_be_reset = self.env.timed_out | self.env.terminated
+                        env_ids = (to_be_reset).nonzero(as_tuple=False).flatten()
+                        self.env._reset_idx(env_ids)
 
-                    self.env.step()
+                    total_rewards = torch.stack(
+                        tuple(rewards_dict.sum(dim=0).values())
+                    ).sum(dim=(0))
 
                     actor_obs = self.get_noisy_obs(
                         self.actor_cfg["obs"], self.actor_cfg["noise"]
                     )
                     critic_obs = self.get_obs(self.critic_cfg["obs"])
 
-                    # * get time_outs
-                    timed_out = self.get_timed_out()
-                    terminated = self.get_terminated()
-                    dones = timed_out | terminated
-
-                    self.update_rewards(rewards_dict, terminated)
-                    total_rewards = torch.stack(tuple(rewards_dict.values())).sum(dim=0)
-
                     transition.update(
                         {
                             "next_actor_obs": actor_obs,
                             "next_critic_obs": critic_obs,
                             "rewards": total_rewards,
-                            "timed_out": timed_out,
-                            "terminated": terminated,
-                            "dones": dones,
+                            "timed_out": self.env.timed_out,
+                            "dones": self.env.timed_out | self.env.terminated,
                         }
                     )
                     storage.add_transitions(transition)
 
-                    logger.log_rewards(rewards_dict)
+                    logger.log_rewards(rewards_dict.sum(dim=0))
                     logger.log_rewards({"total_rewards": total_rewards})
-                    logger.finish_step(dones)
+                    logger.finish_step(self.env.timed_out | self.env.terminated)
             logger.toc("collection")
 
             logger.tic("learning")
@@ -149,21 +147,47 @@ def burn_in_normalization(self, n_iterations=100):
             self.alg.critic.evaluate(critic_obs)
         self.env.reset()
 
-    def update_rewards(self, rewards_dict, terminated):
-        rewards_dict.update(
+    def update_rewards_dict(self, rewards_dict, step):
+        # sum existing rewards with new rewards
+        rewards_dict[step].update(
             self.get_rewards(
-                self.critic_cfg["reward"]["termination_weight"], mask=terminated
-            )
+                self.critic_cfg["reward"]["termination_weight"],
+                modifier=self.env.dt,
+                mask=self.env.terminated,
+            ),
+            inplace=True,
         )
-        rewards_dict.update(
+        rewards_dict[step].update(
             self.get_rewards(
                 self.critic_cfg["reward"]["weights"],
                 modifier=self.env.dt,
-                mask=~terminated,
+                mask=~self.env.terminated,
+            ),
+            inplace=True,
+        )
+
+    def initialize_rewards_dict(self, n_steps):
+        # sum existing rewards with new rewards
+        rewards_dict = TensorDict(
+            {}, batch_size=(n_steps, self.env.num_envs), device=self.device
+        )
+        for key in self.critic_cfg["reward"]["termination_weight"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
             )
+        for key in self.critic_cfg["reward"]["weights"]:
+            rewards_dict.update(
+                {key: torch.zeros(n_steps, self.env.num_envs, device=self.device)}
+            )
+        return rewards_dict
+
+    def set_up_logger(self, dt=None):
+        if dt is None:
+            dt = self.env.dt
+        logger.initialize(
+            self.env.num_envs, dt, self.cfg["max_iterations"], self.device
         )
 
-    def set_up_logger(self):
         logger.register_rewards(list(self.critic_cfg["reward"]["weights"].keys()))
         logger.register_rewards(
             list(self.critic_cfg["reward"]["termination_weight"].keys())
diff --git a/learning/utils/logger/PerIterationLogs.py b/learning/utils/logger/PerIterationLogs.py
index 0f30b899..5d781efb 100644
--- a/learning/utils/logger/PerIterationLogs.py
+++ b/learning/utils/logger/PerIterationLogs.py
@@ -6,9 +6,9 @@ def __init__(self):
 
     def register_items(self, category, target, attribute_list):
         if category in self.targets.keys():
-            assert (
-                self.targets[category] == target
-            ), "Category already registered with different target"
+            assert self.targets[category] == target, (
+                "Category already registered with different target"
+            )
         else:
             self.targets[category] = target
             self.logs[category] = {}
diff --git a/learning/utils/logger/test_logger.py b/learning/utils/logger/test_logger.py
index eaef76f6..941a4f0b 100644
--- a/learning/utils/logger/test_logger.py
+++ b/learning/utils/logger/test_logger.py
@@ -10,9 +10,9 @@ def __init__(self):
 
 def all_rewards_registered(logger, reward_names):
     for key in reward_names:
-        assert (
-            key in logger.reward_logs.log_items.keys()
-        ), "key not registered in logger."
+        assert key in logger.reward_logs.log_items.keys(), (
+            "key not registered in logger."
+        )
 
 
 def only_rewards_registered(logger, reward_names):
@@ -27,13 +27,13 @@ def only_category_registered(logger, categories):
 
 def both_target_and_log_set_up(logger):
     for key in logger.iteration_logs.logs.keys():
-        assert (
-            key in logger.iteration_logs.targets.keys()
-        ), "target not registered in PerIteration."
+        assert key in logger.iteration_logs.targets.keys(), (
+            "target not registered in PerIteration."
+        )
     for key in logger.iteration_logs.targets.keys():
-        assert (
-            key in logger.iteration_logs.logs.keys()
-        ), "log not registered in PerIteration."
+        assert key in logger.iteration_logs.logs.keys(), (
+            "log not registered in PerIteration."
+        )
 
 
 def test_logger_setup():
@@ -71,16 +71,16 @@ def check_episode_count(logger, expected_count=1):
 
 def check_average_time(logger, expected_time):
     avg_time = logger.reward_logs.get_average_time()
-    assert (
-        abs(avg_time.item() - expected_time) < 1e-5
-    ), f"Average time {avg_time} is not close to {expected_time}"
+    assert abs(avg_time.item() - expected_time) < 1e-5, (
+        f"Average time {avg_time} is not close to {expected_time}"
+    )
 
 
 def check_average_reward(logger, reward_name, expected_average):
     avg_reward = logger.reward_logs.get_average_rewards()[reward_name]
-    assert (
-        abs(avg_reward.item() - expected_average) < 1e-5
-    ), f"Average reward {avg_reward} is not close to {expected_average}"
+    assert abs(avg_reward.item() - expected_average) < 1e-5, (
+        f"Average reward {avg_reward} is not close to {expected_average}"
+    )
 
 
 def test_logging_rewards():
@@ -173,8 +173,8 @@ def test_timer():
 
     ETA2 = logger.estimate_ETA(["first_step"], mode="total")
     expected_ETA2 = a * (1000 - 1)
-    assert (
-        abs(ETA2 - expected_ETA2) < 1e-5
-    ), f"ETA {ETA2} is not close to {expected_ETA2}"
+    assert abs(ETA2 - expected_ETA2) < 1e-5, (
+        f"ETA {ETA2} is not close to {expected_ETA2}"
+    )
 
     assert (a + b) >= 2 * trial_time, "Timer not working correctly."
diff --git a/resources/robots/mit_humanoid/friction_model_L.pt b/resources/robots/mit_humanoid/friction_model_L.pt
new file mode 100644
index 00000000..d1d680d5
Binary files /dev/null and b/resources/robots/mit_humanoid/friction_model_L.pt differ
diff --git a/resources/robots/mit_humanoid/friction_model_R.pt b/resources/robots/mit_humanoid/friction_model_R.pt
new file mode 100644
index 00000000..ce831fc8
Binary files /dev/null and b/resources/robots/mit_humanoid/friction_model_R.pt differ
diff --git a/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf b/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf
index 51a84eee..f7f55186 100644
--- a/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf
+++ b/resources/robots/mit_humanoid/urdf/humanoid_F_sf.urdf
@@ -562,7 +562,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="15_left_shoulder_pitch"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0.01911 0.17608 0.30392"
       rpy="0 0 0" />
@@ -607,7 +607,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="16_left_shoulder_abad"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 .05760 0"
       rpy="0.0 0 0" />
@@ -660,7 +660,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="17_left_shoulder_yaw"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.10250"
       rpy="0.0 0 0" />
@@ -711,7 +711,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="18_left_elbow"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.15750"
       rpy="0 0 0.0" />
@@ -790,7 +790,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="11_right_shoulder_pitch"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0.01911 -0.17608 0.30392"
       rpy="0 0 0" />
@@ -835,7 +835,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="12_right_shoulder_abad"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 -.05760 0"
       rpy="0.0 0 0" />
@@ -888,7 +888,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="13_right_shoulder_yaw"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.10250"
       rpy="0.0 0 0" />
@@ -939,7 +939,7 @@ Simple Foot: foot approximated as single box-contact -->
   </link>
   <joint
     name="14_right_elbow"
-    type="revolute">
+    type="fixed">
     <origin
       xyz="0 0 -.15750"
       rpy="0 0 0.0" />
diff --git a/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
new file mode 100644
index 00000000..98a0773a
--- /dev/null
+++ b/resources/robots/mit_humanoid/urdf/humanoid_F_sf_learnt.urdf
@@ -0,0 +1,996 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<!-- URDF for MIT Humanoid
+Version Specifics: "Full" and "Simple Foot"
+Full: collision geometries for all links
+Simple Foot: foot approximated as single box-contact -->
+
+<robot
+  name="humanoid">
+  <link
+    name="base">
+    <inertial>
+      <origin
+        xyz="0.009598 0.000935 0.151714"
+        rpy="0 0 0" />
+      <mass value="7.954054" />
+      <inertia
+        ixx="0.168459"
+        ixy="0.000124"
+        ixz="0.006493"
+        iyy="0.101358"
+        iyz="0.000278"
+        izz="0.091754" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="-0.00565 0 -0.05735"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/torso.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.752941176470588 0.752941176470588 0.752941176470588 1" />
+      </material>
+    </visual>
+     <collision>
+            <origin rpy="0 0 0" xyz="0.023 0. 0.08"/>
+            <geometry>
+                <box size="0.15 0.3 0.35"/>
+            </geometry>
+    </collision>
+  </link>
+  <link
+    name="left_hip_yaw">
+    <inertial>
+      <origin
+        xyz="-0.065677489200101  -0.001868461810995  -0.063608863694661"
+        rpy="0 0 0" />
+      <mass value="1.03610198878872" />
+      <inertia
+        ixx="0.00567730565299"
+        ixy="-0.000123286844996"
+        ixz="-0.003803404981833"
+        iyy="0.01000676842184"
+        iyz="-0.000123074285517"
+        izz="0.005622832879212" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_yaw.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="06_left_hip_yaw"
+    type="revolute">
+    <origin
+      xyz="-0.00565 0.082 -0.05735"
+      rpy="0 -0.174533 0" />
+    <parent
+      link="base" />
+    <child
+      link="left_hip_yaw" />
+    <axis
+      xyz="0 0 -1" />
+    <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  <link
+    name="left_hip_abad">
+    <inertial>
+      <origin
+        xyz="0.058112835965384 -0.013505242569335 0.014569589589137"
+        rpy="0 0 0" />
+      <mass value="1.10845642066563" />
+      <inertia
+        ixx="0.001946765858581"
+        ixy="0.000875106562403"
+        ixz="-0.000401465299746"
+        iyy="0.005349108237236"
+        iyz="0.000216871363655"
+        izz="0.005114615354879" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_abad.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="07_left_hip_abad"
+    type="revolute">
+    <origin
+      xyz="-0.06435 0 -.07499"
+      rpy="0.0 0.436332 0.0"/>
+    <parent
+      link="left_hip_yaw" />
+    <child
+      link="left_hip_abad" />
+    <axis
+      xyz="1 0 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+ <link 
+    name="left_upper_leg">
+    <inertial>
+      <origin
+        xyz="0.004372713492862  0.025610933851324 -0.09370979295432"
+        rpy="0 0 0" />
+      <mass value="2.9466623694791" />
+      <inertia
+        ixx="0.056092231067477"
+        ixy="-0.000490336872141"
+        ixz="0.004048303589526"
+        iyy="0.055684035451577"
+        iyz="0.009711050120379"
+        izz="0.005614311941973" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0.01283 0 -0.0174"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_upper.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.16"/>
+        <geometry>
+            <cylinder radius="0.035" length="0.16"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="08_left_hip_pitch"
+    type="revolute">
+    <origin
+      xyz="0.07133 -0.002 0.0"
+      rpy="0 -0.261799 0" />
+    <parent
+      link="left_hip_abad" />
+    <child
+      link="left_upper_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="72" lower="-3" upper="3" velocity="40"/>
+  </joint>
+
+  <link
+    name="left_lower_leg">
+    <inertial>
+      <origin
+        xyz="-0.014401204856919 -0.001696539969774 0.027394674089734"
+        rpy="0 0 0" />
+      <mass value="0.238357586915263" />
+      <inertia
+        ixx="0.010975524578483"
+        ixy="0.000008"
+        ixz="0.000491443762571"
+        iyy="0.011034922309191"
+        iyz="0.00009"
+        izz="0.000294128826005" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_lower.stl" scale="0.001 0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.05" length="0.13"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="09_left_knee"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2666"
+      rpy="0 0 0" />
+    <parent
+      link="left_upper_leg" />
+    <child
+      link="left_lower_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="144" lower="0." upper="3." velocity="20"/>
+  </joint>
+
+   <link
+    name="left_foot">
+    <inertial>
+      <origin
+        xyz="0.003235539757018 -0.000321708855445 -0.024835658864996"
+        rpy="0 0 0" />
+      <mass value="0.434332672008075" />
+      <inertia
+        ixx="0.000370048860637"
+        ixy="0.000000361777951684732"
+        ixz="-0.000118105634294"
+        iyy="0.002398432542765"
+        iyz="-0.00000349674373383071"
+        izz="0.002083498286479" />
+    </inertial>
+    <!-- Foot inertia Iyy default: 0.000806
+    Modified to account for reflected motor inertia -->
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_foot.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.63671875 0.12109375 0.752941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <!-- <origin rpy="1.57079 0 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+            <box size="0.20 0.02 0.04"/>
+        </geometry> -->
+        <origin rpy="0 1.57079 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+            <cylinder length="0.15" radius="0.02"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="10_left_ankle"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2785"
+      rpy="0 0 0" />
+    <parent
+      link="left_lower_leg" />
+    <child
+      link="left_foot" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="68" lower="-0.97" upper="0.97" velocity="24"/>
+  </joint>
+ <link
+    name="right_hip_yaw">
+    <inertial>
+      <origin
+        xyz="-0.065184991281157 0.001391305332592 -0.064121979944007"
+        rpy="0 0 0" />
+      <mass value="0.944770728939119" />
+      <inertia
+        ixx="0.005357052181805"
+        ixy="0.0000818747123"
+        ixz="-0.003427894976548"
+        iyy="0.00923535702765"
+        iyz="0.000084203916519"
+        izz="0.005156514191139" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_yaw.stl" scale="0.001 -0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="01_right_hip_yaw"
+    type="revolute">
+    <origin
+      xyz="-0.00565 -0.082 -0.05735"
+      rpy="0 -0.174533 0" />
+    <parent
+      link="base" />
+    <child
+      link="right_hip_yaw" />
+    <axis
+      xyz="0 0 1" />
+    <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  
+  <link
+    name="right_hip_abad">
+    <inertial>
+      <origin
+        xyz="0.052141443908008  0.013947510011721  0.024209980877026"
+        rpy="0 0 0" />
+      <mass value="2.22134334871633" />
+      <inertia
+        ixx="0.003252746154843"
+        ixy="-0.001623511116749"
+        ixz="-0.002263316469026"
+        iyy="0.008719830483193"
+        iyz="-0.000745379387761"
+        izz="0.007646079982067" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_hip_abad.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="02_right_hip_abad"
+    type="revolute">
+    <origin
+      xyz="-0.06435 0 -.07499"
+      rpy="0.0 0.436332 0.0"/>
+    <parent
+      link="right_hip_yaw" />
+    <child
+      link="right_hip_abad" />
+    <axis
+      xyz="-1 0 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  
+ <link
+    name="right_upper_leg">
+    <inertial>
+      <origin
+        xyz="0.006755522658048 -0.024808560969254 -0.091196015308728"
+        rpy="0 0 0" />
+      <mass value="3.20991362323237" />
+      <inertia
+        ixx="0.054987344909268"
+        ixy="0.000849211308416"
+        ixz="0.006853857315879"
+        iyy="0.055272867443404"
+        iyz="-0.009594753352539"
+        izz="0.006327830944097" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0.01283 0 -0.0174"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_upper.stl" scale="0.001 -0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.16"/>
+        <geometry>
+            <cylinder radius="0.035" length="0.16"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="03_right_hip_pitch"
+    type="revolute">
+    <origin
+      xyz="0.07133 0.002 0.0"
+      rpy="0 -0.261799 0" />
+    <parent
+      link="right_hip_abad" />
+    <child
+      link="right_upper_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="72" lower="-3" upper="3" velocity="40"/>
+  </joint>
+  
+  <link
+    name="right_lower_leg">
+    <inertial>
+      <origin
+        xyz="-0.021181283533562 0.001390129997816 0.007356617786071"
+        rpy="0 0 0" />
+      <mass value="0.224587667752391" />
+      <inertia
+        ixx="0.010215815580851"
+        ixy="0.000006180468"
+        ixz="0.000362813548474"
+        iyy="0.010326014694671"
+        iyz="-0.0000402996255"
+        izz="0.000352688938011" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_leg_lower.stl" scale="0.001 -0.001 0.001" />
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0.0 0.0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.05" length="0.13"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="04_right_knee"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2666"
+      rpy="0 0 0" />
+    <parent
+      link="right_upper_leg" />
+    <child
+      link="right_lower_leg" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="144" lower="0." upper="3.0" velocity="20"/>
+  </joint>
+  
+   <link
+    name="right_foot">
+    <inertial>
+      <origin
+        xyz="0.008967719334277 0.00070566689364 -0.023317389576155"
+        rpy="0 0 0" />
+      <mass value="0.48135110558567" />
+      <inertia
+        ixx="0.000362930457918"
+        ixy="-0.00000178237"
+        ixz="-0.000047781"
+        iyy="0.002320413729313"
+        iyz="0.00000826302"
+        izz="0.002012953781741" />
+    </inertial>
+    <!-- Foot inertia Iyy default: 0.000806
+    Modified to account for reflected motor inertia -->
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_foot.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <!-- <origin rpy="1.57079 0 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+            <box size="0.20 0.02 0.04"/>
+        </geometry> -->
+        <origin rpy="0 1.57079 0" xyz="0.03 0.0 -0.03"/>
+        <geometry>
+          <cylinder length="0.15" radius="0.02"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="05_right_ankle"
+    type="revolute">
+    <origin
+      xyz="0.0 0.0 -0.2785"
+      rpy="0 0 0" />
+    <parent
+      link="right_lower_leg" />
+    <child
+      link="right_foot" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="68" lower="-0.97" upper="0.97" velocity="24"/>
+  </joint>
+  <!-- Left Arm -->
+  <link
+    name="left_shoulder">
+    <inertial>
+      <origin
+        xyz="0.009518 0.053027 0.000060"
+        rpy="0 0 0" />
+      <mass value="0.788506" />
+      <inertia
+        ixx="0.001363 "
+        ixy="0.000024 "
+        ixz=" -0.000002 "
+        iyy="0.000759 "
+        iyz="  0.000001"
+        izz="  0.000866" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder1.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="15_left_shoulder_pitch"
+    type="revolute">
+    <origin
+      xyz="0.01346 0.17608 0.24657"
+      rpy="0 0 0" />
+    <parent
+      link="base" />
+    <child
+      link="left_shoulder" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  
+  <link
+    name="left_shoulder_2">
+    <inertial>
+      <origin
+        xyz="0.000117 0.000122 -0.082758"
+        rpy="0 0 0" />
+      <mass value="0.801249" />
+      <inertia
+        ixx=" 0.001136 "
+        ixy="0.000001 "
+        ixz="0.000016 "
+        iyy="0.001175 "
+        iyz=" 0.000001 "
+        izz="0.001241 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder2.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="16_left_shoulder_abad"
+    type="revolute">
+    <origin
+      xyz="0 .05760 0"
+      rpy="0.0 0 0" />
+    <parent
+      link="left_shoulder" />
+    <child
+      link="left_shoulder_2" />
+    <axis
+      xyz="1 0 0" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  <!--  -->
+  
+  <link
+    name="left_upper_arm">
+    <inertial>
+      <origin
+        xyz="-0.000073 -0.016471  -0.063210"
+        rpy="0 0 0" />
+      <mass value="0.905588" />
+      <inertia
+        ixx=" 0.001353 "
+        ixy="0.000002 "
+        ixz="0.000001 "
+        iyy="0.001804 "
+        iyz="-0.000061 "
+        izz="0.000796 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder3.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0 -0.01 -0.035"/>
+        <geometry>
+            <!-- <box size="0.1 0.085 0.15"/> -->
+            <cylinder radius="0.05" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="17_left_shoulder_yaw"
+    type="revolute">
+    <origin
+      xyz="0 0 -.10250"
+      rpy="0.0 0 0" />
+    <parent
+      link="left_shoulder_2" />
+    <child
+      link="left_upper_arm" />
+    <axis
+      xyz="0 0 -1" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+
+  <link
+    name="left_lower_arm">
+    <inertial>
+      <origin
+        xyz="-0.007103 -0.000073 -0.099203"
+        rpy="0 0 0" />
+      <mass value="0.348390" />
+      <inertia
+        ixx="0.003400 "
+        ixy="0.0 "
+        ixz="-0.000049 "
+        iyy="0.003453 "
+        iyz="-0.000002 "
+        izz="0.000078 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_forearm.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="-0.01 0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.025" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="18_left_elbow"
+    type="revolute">
+    <origin
+      xyz="0 0 -.15750"
+      rpy="0 0 0.0" />
+    <parent
+      link="left_upper_arm" />
+    <child
+      link="left_lower_arm" />
+    <axis
+      xyz="0 1 0" />
+        <limit effort="55" lower="-3" upper="3" velocity="30"/>
+  </joint>
+  <link
+    name="left_hand">
+    <inertial>
+      <origin
+        xyz="0.0 -0.0 -0.27"
+        rpy="0 0 0" />
+      <mass value="0.001" />
+      <inertia
+        ixx="0.0"
+        ixy="0.0"
+        ixz="0.0"
+        iyy="0.0"
+        iyz="0.0"
+        izz="0.0" />
+    </inertial>
+      <collision>
+        <origin rpy="1.57079 0 0" xyz="0.0 0.0 0."/>
+        <geometry>
+            <cylinder radius="0.025" length="0.015"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="left_hand_link"
+    type="fixed">
+    <origin
+      xyz="0 0 -0.27"
+      rpy="0 0 0" />
+    <parent
+      link="left_lower_arm" />
+    <child
+      link="left_hand" />
+  </joint>
+
+  <!-- Right Arm -->
+  <link
+    name="right_shoulder">
+    <inertial>
+      <origin
+        xyz="0.009518 -0.053027 0.000060"
+        rpy="0 0 0" />
+      <mass value="0.788506" />
+      <inertia
+        ixx="0.001363 "
+        ixy="-0.000024 "
+        ixz=" 0.000002 "
+        iyy="0.000759 "
+        iyz="  0.000001"
+        izz="  0.000866" />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder1.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="11_right_shoulder_pitch"
+    type="revolute">
+    <origin
+      xyz="0.01346 -0.17608 0.24657"
+      rpy="0 0 0" />
+    <parent
+      link="base" />
+    <child
+      link="right_shoulder" />
+    <axis
+      xyz="0 1 0" />
+          <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+
+  <link
+    name="right_shoulder_2">
+    <inertial>
+      <origin
+        xyz="0.000117 -0.000122 -0.082758"
+        rpy="0 0 0" />
+      <mass value="0.801249" />
+      <inertia
+        ixx=" 0.001136 "
+        ixy="-0.000001 "
+        ixz="-0.000016 "
+        iyy="0.001175 "
+        iyz=" 0.000001 "
+        izz="0.001241 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder2.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+  </link>
+  <joint
+    name="12_right_shoulder_abad"
+    type="revolute">
+    <origin
+      xyz="0 -.05760 0"
+      rpy="0.0 0 0" />
+    <parent
+      link="right_shoulder" />
+    <child
+      link="right_shoulder_2" />
+    <axis
+      xyz="-1 0 0" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+  <!--  -->
+
+  <link
+    name="right_upper_arm">
+    <inertial>
+      <origin
+        xyz="-0.000073 0.016471  -0.063210"
+        rpy="0 0 0" />
+      <mass value="0.905588" />
+      <inertia
+        ixx=" 0.001353 "
+        ixy="-0.000002 "
+        ixz="-0.000001 "
+        iyy="0.001804 "
+        iyz="-0.000061 "
+        izz="0.000796 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_shoulder3.stl" scale="0.001 -0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="0 -0.01 -0.035"/>
+        <geometry>
+            <!-- <box size="0.1 0.085 0.15"/> -->
+            <cylinder radius="0.05" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="13_right_shoulder_yaw"
+    type="revolute">
+    <origin
+      xyz="0 0 -.10250"
+      rpy="0.0 0 0" />
+    <parent
+      link="right_shoulder_2" />
+    <child
+      link="right_upper_arm" />
+    <axis
+      xyz="0 0 1" />
+        <limit effort="34" lower="-3" upper="3" velocity="48"/>
+  </joint>
+
+  <link
+    name="right_lower_arm">
+    <inertial>
+      <origin
+        xyz="-0.007103 -0.000073 -0.099203"
+        rpy="0 0 0" />
+      <mass value="0.348390" />
+      <inertia
+        ixx="0.003400 "
+        ixy="0.0 "
+        ixz="-0.000049 "
+        iyy="0.003453 "
+        iyz="-0.000002 "
+        izz="0.000078 " />
+    </inertial>
+    <visual>
+      <origin
+        xyz="0 0 0"
+        rpy="0 0 0" />
+      <geometry>
+        <mesh
+          filename="meshes_v3/left_forearm.stl" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material
+        name="">
+        <color
+          rgba="0.52941176470588 0.752941176470588 0.2941176470588 1" />
+      </material>
+    </visual>
+    <collision>
+        <origin rpy="0 0 0" xyz="-0.01 0 -0.15"/>
+        <geometry>
+            <cylinder radius="0.025" length="0.15"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="14_right_elbow"
+    type="revolute">
+    <origin
+      xyz="0 0 -.15750"
+      rpy="0 0 0.0" />
+    <parent
+      link="right_upper_arm" />
+    <child
+      link="right_lower_arm" />
+    <axis
+      xyz="0 1 0" />
+        <limit effort="55" lower="-3" upper="3" velocity="30"/>
+  </joint>
+    <link
+    name="right_hand">
+    <inertial>
+      <origin
+        xyz="0.0 -0.0 -0.27"
+        rpy="0 0 0" />
+      <mass value="0.001" />
+      <inertia
+        ixx="0.0"
+        ixy="0.0"
+        ixz="0.0"
+        iyy="0.0"
+        iyz="0.0"
+        izz="0.0" />
+    </inertial>
+      <collision>
+        <origin rpy="1.57079 0 0" xyz="0.0 0.0 0."/>
+        <geometry>
+            <cylinder radius="0.025" length="0.015"/>
+        </geometry>
+    </collision>
+  </link>
+  <joint
+    name="right_hand_link"
+    type="fixed">
+    <origin
+      xyz="0 0 -0.27"
+      rpy="0 0 0" />
+    <parent
+      link="right_lower_arm" />
+    <child
+      link="right_hand" />
+  </joint>
+
+</robot>
diff --git a/tests/integration_tests/test_runner_integration.py b/tests/integration_tests/test_runner_integration.py
index 611108e5..d847f627 100644
--- a/tests/integration_tests/test_runner_integration.py
+++ b/tests/integration_tests/test_runner_integration.py
@@ -58,9 +58,9 @@ def test_default_integration_settings(self, args):
         with torch.no_grad():
             actions = runner.get_inference_actions()
             deployed_actions = runner.env.get_states(runner.actor_cfg["actions"])
-        assert (
-            torch.equal(actions, torch.zeros_like(actions)) is False
-        ), "Policy returning all zeros"
+        assert torch.equal(actions, torch.zeros_like(actions)) is False, (
+            "Policy returning all zeros"
+        )
         assert (
             torch.equal(deployed_actions, torch.zeros_like(deployed_actions)) is False
         ), "Actions not written to environment"
@@ -78,9 +78,9 @@ def test_default_integration_settings(self, args):
         model_7_path = os.path.join(runner.log_dir, "model_7.pt")
         model_8_path = os.path.join(runner.log_dir, "model_8.pt")
 
-        assert os.path.exists(
-            model_0_path
-        ), f"{model_0_path} (pre-iteration) was not saved"
+        assert os.path.exists(model_0_path), (
+            f"{model_0_path} (pre-iteration) was not saved"
+        )
         assert not os.path.exists(model_1_path), f"{model_1_path} was saved"
         assert not os.path.exists(model_2_path), f"{model_2_path} was saved"
         assert os.path.exists(model_3_path), f"{model_3_path} was not saved"
@@ -88,9 +88,9 @@ def test_default_integration_settings(self, args):
         assert not os.path.exists(model_5_path), f"{model_5_path} was saved"
         assert os.path.exists(model_6_path), f"{model_6_path} was not saved"
         assert not os.path.exists(model_7_path), f"{model_7_path} was saved"
-        assert os.path.exists(
-            model_8_path
-        ), f"{model_5_path}(last iteration) was not saved"
+        assert os.path.exists(model_8_path), (
+            f"{model_5_path}(last iteration) was not saved"
+        )
 
         obs = torch.randn_like(runner.get_obs(runner.actor_cfg["obs"]))
         actions_first = runner.alg.actor.act_inference(obs).cpu().clone()
diff --git a/tests/regression_tests/test_generated_torch_files.py b/tests/regression_tests/test_generated_torch_files.py
index cf640ed3..55626034 100644
--- a/tests/regression_tests/test_generated_torch_files.py
+++ b/tests/regression_tests/test_generated_torch_files.py
@@ -27,6 +27,6 @@ def test_generated_tensor_matches_reference(tmp_path):
     _generate_candidate_tensor(candidate_path)
     reference = _load_tensor(REFERENCE_FILE)
     candidate = _load_tensor(candidate_path)
-    assert torch.equal(
-        reference, candidate
-    ), f"Tensors in {REFERENCE_FILE} and {candidate_path} differ."
+    assert torch.equal(reference, candidate), (
+        f"Tensors in {REFERENCE_FILE} and {candidate_path} differ."
+    )