diff --git a/README.md b/README.md index 7903a3a..251d73e 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,12 @@ For example, to play the CarryBox task: python legged_gym/scripts/play.py --task carrybox --resume_path resources/ckpt/carrybox.pt ``` +### Sim2sim(Mujoco) +Only for sim2sim testing of the carry box task. +```bash +python deploy/deploy_mujoco/deploy_mujoco.py g1.yaml +``` + > ⚠️ Note: > > During the first 1–2 episodes of `play.py`, you may observe slight interpenetration between the robot, the object, or the platform. diff --git a/legged_gym/deploy/deploy_mujoco/configs/g1.yaml b/legged_gym/deploy/deploy_mujoco/configs/g1.yaml new file mode 100644 index 0000000..60fe07d --- /dev/null +++ b/legged_gym/deploy/deploy_mujoco/configs/g1.yaml @@ -0,0 +1,41 @@ +policy_path: "{LEGGED_GYM_ROOT_DIR}/deploy/pre_train/g1/carrybox.pt" +xml_path: "{LEGGED_GYM_ROOT_DIR}/resources/robots/g1/g1_29dof.xml" + +# Simulation settings +simulation_duration: 10.0 +simulation_dt: 0.002 +control_decimation: 10 + +# PD gains (29 DOF) +kps: [150, 150, 150, 300, 40, 40, # left leg + 150, 150, 150, 300, 40, 40, # right leg + 300, 300, 300, # waist + 200, 200, 200, 100, 20, 20, 20, # left arm + 200, 200, 200, 100, 20, 20, 20] # right arm + +kds: [2, 2, 2, 4, 1, 1, # left leg + 2, 2, 2, 4, 1, 1, # right leg + 4, 4, 4, # waist + 3, 3, 3, 1, 0.5, 0.5, 0.5, # left arm + 3, 3, 3, 1, 0.5, 0.5, 0.5] # right arm + +# Default joint angles (rad) +default_angles: [-0.1, 0.0, 0.0, 0.3, -0.2, 0.0, # left leg + -0.1, 0.0, 0.0, 0.3, -0.2, 0.0, # right leg + 0.0, 0.0, 0.0, # waist + 0.0, 0.1, 0.0, 1.2, 0.0, 0.0, 0.0, # left arm + 0.0, -0.1, 0.0, 1.2, 0.0, 0.0, 0.0] # right arm + +# Observation and action scaling +ang_vel_scale: 0.25 +dof_pos_scale: 1.0 +dof_vel_scale: 0.05 +action_scale: 0.25 + +# Dimensions +num_actions: 29 +num_obs: 123 + +# Clipping +clip_observations: 100.0 +clip_actions: 100.0 \ No newline at end of file diff --git a/legged_gym/deploy/deploy_mujoco/deploy_mujoco.py b/legged_gym/deploy/deploy_mujoco/deploy_mujoco.py new file mode 100644 index 0000000..4c43ea9 --- /dev/null +++ b/legged_gym/deploy/deploy_mujoco/deploy_mujoco.py @@ -0,0 +1,391 @@ +import argparse +import os +import time +from collections import deque + +import mujoco +import mujoco.viewer +import numpy as np +import torch +import yaml + +from legged_gym import LEGGED_GYM_ROOT_DIR + + +# ============================================================================= +# Utility Functions +# ============================================================================= + +def get_gravity_orientation(quaternion: np.ndarray) -> np.ndarray: + """Extract gravity orientation from quaternion [w, x, y, z].""" + qw, qx, qy, qz = quaternion[0], quaternion[1], quaternion[2], quaternion[3] + return np.array([ + 2 * (-qz * qx + qw * qy), + -2 * (qz * qy + qw * qx), + 1 - 2 * (qw * qw + qz * qz) + ]) + + +def pd_control( + target_q: np.ndarray, + q: np.ndarray, + kp: np.ndarray, + target_dq: np.ndarray, + dq: np.ndarray, + kd: np.ndarray +) -> np.ndarray: + """Proportional-Derivative control: tau = Kp * (q* - q) + Kd * (dq* - dq).""" + return (target_q - q) * kp + (target_dq - dq) * kd + + +def quat_rotate_inverse(q: np.ndarray, v: np.ndarray) -> np.ndarray: + """Rotate vector from world to body frame using quaternion [w, x, y, z].""" + q = np.asarray(q, dtype=np.float32).reshape(-1, 4) + v = np.asarray(v, dtype=np.float32).reshape(-1, 3) + + q_w = q[:, 0] + q_vec = q[:, 1:4] + + a = v * (2.0 * q_w ** 2 - 1.0)[:, np.newaxis] + b = np.cross(q_vec, v, axis=-1) * q_w[:, np.newaxis] * 2.0 + c = q_vec * np.sum(q_vec * v, axis=1)[:, np.newaxis] * 2.0 + + return (a - b + c).squeeze() + + +def quat_conjugate_np(a: np.ndarray) -> np.ndarray: + """Compute quaternion conjugate for [x, y, z, w] format.""" + shape = a.shape + a = a.reshape(-1, 4) + result = np.concatenate((-a[:, :3], a[:, -1:]), axis=-1) + return result.reshape(shape) + + +def quat_mul_np(a: np.ndarray, b: np.ndarray) -> np.ndarray: + """Quaternion multiplication for [x, y, z, w] format.""" + assert a.shape == b.shape + shape = a.shape + a = a.reshape(-1, 4) + b = b.reshape(-1, 4) + + x1, y1, z1, w1 = a[:, 0], a[:, 1], a[:, 2], a[:, 3] + x2, y2, z2, w2 = b[:, 0], b[:, 1], b[:, 2], b[:, 3] + + ww = (z1 + x1) * (x2 + y2) + yy = (w1 - y1) * (w2 + z2) + zz = (w1 + y1) * (w2 - z2) + xx = ww + yy + zz + qq = 0.5 * (xx + (z1 - x1) * (x2 - y2)) + w = qq - ww + (z1 - y1) * (y2 - z2) + x = qq - xx + (x1 + w1) * (x2 + w2) + y = qq - yy + (w1 - x1) * (y2 + z2) + z = qq - zz + (z1 + y1) * (w2 - x2) + + return np.stack([x, y, z, w], axis=-1).reshape(shape) + + +def quat_rotate_np(q: np.ndarray, v: np.ndarray) -> np.ndarray: + """Rotate vector using quaternion [x, y, z, w].""" + v_shape = v.shape + q = q.reshape(-1, 4) + v = v.reshape(-1, 3) + + q_w = q[:, 3] + q_vec = q[:, :3] + + a = v * (2.0 * q_w ** 2 - 1.0)[:, np.newaxis] + b = 2.0 * q_w[:, np.newaxis] * np.cross(q_vec, v, axis=-1) + c = 2.0 * q_vec * np.sum(q_vec * v, axis=1)[:, np.newaxis] + + return (a + b + c).reshape(v_shape) + + +def quat_to_tan_norm_np(q: np.ndarray) -> np.ndarray: + """Convert quaternion to 6D rotation: rotated x and z axes [x, y, z, w].""" + ref_tan = np.zeros_like(q[..., :3]) + ref_tan[..., 0] = 1.0 + tan = quat_rotate_np(q, ref_tan) + + ref_norm = np.zeros_like(q[..., :3]) + ref_norm[..., -1] = 1.0 + norm = quat_rotate_np(q, ref_norm) + + return np.concatenate([tan, norm], axis=-1) + + +# ============================================================================= +# Main Class: MuJoCo Policy Deployer +# ============================================================================= + +class MujocoPolicyDeployer: + """Deploy PyTorch JIT policy in MuJoCo simulation with real-time visualization.""" + + def __init__(self, config_file: str) -> None: + self.config_file = config_file + self.config = None + + self.m: mujoco.MjModel | None = None + self.d: mujoco.MjData | None = None + self.policy: torch.jit.ScriptModule | None = None + + # Control variables + self.actions = None + self.target_dof_pos = None + self.counter = 0 + + # Observation buffer + self.history_length = 6 + self.obs = None + self.obs_buffer: deque[np.ndarray] = deque(maxlen=self.history_length) + + # Body and geom IDs + self.hand1_id = None + self.hand2_id = None + self.foot1_id = None + self.foot2_id = None + self.head_id = None + self.box_body_id = None + self.box_id = None + self.box_goal = None + + self.gravity_vec = [0, 0, -1] + + # Initialize components + self.load_config() + self.init_mujoco() + self.load_policy() + self.init_runtime_buffers() + self.init_body_ids() + + # -------------------------------------------------------------------------- + # Initialization Methods + # -------------------------------------------------------------------------- + + def load_config(self) -> None: + """Load and parse YAML configuration file with path macro substitution.""" + config_path = os.path.join( + LEGGED_GYM_ROOT_DIR, + "deploy", + "deploy_mujoco", + "configs", + self.config_file, + ) + if not os.path.exists(config_path): + raise FileNotFoundError(f"Config file not found at: {config_path}") + + with open(config_path, "r") as f: + self.config = yaml.load(f, Loader=yaml.FullLoader) + + # Replace path macros + self.config["policy_path"] = self.config["policy_path"].replace( + "{LEGGED_GYM_ROOT_DIR}", LEGGED_GYM_ROOT_DIR + ) + self.config["xml_path"] = self.config["xml_path"].replace( + "{LEGGED_GYM_ROOT_DIR}", LEGGED_GYM_ROOT_DIR + ) + + # Convert array parameters to numpy arrays + self.kps = np.array(self.config["kps"], dtype=np.float32) + self.kds = np.array(self.config["kds"], dtype=np.float32) + self.default_angles = np.array(self.config["default_angles"], dtype=np.float32) + + # Scalar parameters + self.ang_vel_scale = float(self.config["ang_vel_scale"]) + self.dof_pos_scale = float(self.config["dof_pos_scale"]) + self.dof_vel_scale = float(self.config["dof_vel_scale"]) + self.action_scale = float(self.config["action_scale"]) + self.clip_observations = float(self.config["clip_observations"]) + self.clip_actions = float(self.config["clip_actions"]) + + def init_mujoco(self) -> None: + """Load MuJoCo model, initialize data, and set timestep.""" + xml_path = self.config["xml_path"] + self.m = mujoco.MjModel.from_xml_path(xml_path) + self.d = mujoco.MjData(self.m) + self.m.opt.timestep = self.config["simulation_dt"] + + def load_policy(self) -> None: + """Load PyTorch JIT policy model.""" + policy_path = self.config["policy_path"] + self.policy = torch.jit.load(policy_path) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.policy.to(self.device) + + def init_runtime_buffers(self) -> None: + """Initialize action, observation, and history buffers.""" + num_actions = self.config["num_actions"] + num_obs = self.config["num_obs"] + + self.actions = np.zeros(num_actions, dtype=np.float32) + self.target_dof_pos = self.default_angles.copy() + + self.obs = np.zeros(num_obs * self.history_length, dtype=np.float32) + for _ in range(self.history_length): + self.obs_buffer.append(np.zeros(num_obs, dtype=np.float32)) + + def init_body_ids(self) -> None: + """Get body and geom IDs from MuJoCo model by name.""" + assert self.m is not None + + self.hand1_id = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_BODY, "left_palm_link") + self.hand2_id = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_BODY, "right_palm_link") + self.foot1_id = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_BODY, "left_ankle_pitch_link") + self.foot2_id = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_BODY, "right_ankle_pitch_link") + self.head_id = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_BODY, "mid360_link") + + self.box_body_id = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_BODY, "box") + self.box_id = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_GEOM, "box_geom") + self.box_goal = mujoco.mj_name2id(self.m, mujoco.mjtObj.mjOBJ_BODY, "box_goal") + + # -------------------------------------------------------------------------- + # Observation Construction + # -------------------------------------------------------------------------- + + def update_observation(self) -> None: + """Construct current observation vector with history stacking.""" + # Base state + base_pos = self.d.qpos[:3].copy() + base_quat = self.d.qpos[3:7].copy() + base_ang_vel_body = self.d.qvel[3:6].copy() + gravity_orientation = get_gravity_orientation(base_quat) + + # Joint states + joint_angles = self.d.qpos[7:36].copy() + joint_velocities = self.d.qvel[6:35].copy() + + # End-effector positions (body frame) + ee_ids = [self.hand1_id, self.hand2_id, self.foot1_id, self.foot2_id, self.head_id] + pos_ee_world = np.array([self.d.xpos[ee_id] for ee_id in ee_ids], dtype=np.float32) + vec_ee_world = pos_ee_world - base_pos + vec_ee_local = quat_rotate_inverse(base_quat, vec_ee_world) + end_effector_pos_obs = vec_ee_local.flatten() + + # Last action + action_last = self.actions.copy() + + # Box task observations + box_pos_world = self.d.body(self.box_body_id).xpos.copy() + box_quat_world = self.d.xquat[self.box_body_id].copy() + box_pos_body = quat_rotate_inverse(base_quat, box_pos_world - base_pos) + + # Convert to [x, y, z, w] for custom quaternion functions + base_quat_xyzw = np.concatenate([base_quat[1:], base_quat[0:1]]) + box_quat_world_xyzw = np.concatenate([box_quat_world[1:], box_quat_world[0:1]]) + robot_quat_conj = quat_conjugate_np(base_quat_xyzw) + box_quat_local = quat_mul_np(robot_quat_conj, box_quat_world_xyzw) + box_rot_6d_local = quat_to_tan_norm_np(box_quat_local) + + box_size = self.m.geom_size[self.box_id].copy() * 2.0 + box_goal_pos_world = self.d.body(self.box_goal).xpos.copy() + box_goal_pos_body = quat_rotate_inverse(base_quat, box_goal_pos_world - base_pos) + + # Concatenate current observation + current_obs = np.concatenate( + ( + base_ang_vel_body * self.ang_vel_scale, + gravity_orientation, + (joint_angles - self.default_angles) * self.dof_pos_scale, + joint_velocities * self.dof_vel_scale, + end_effector_pos_obs, + action_last, + box_pos_body, + box_rot_6d_local, + box_size, + box_goal_pos_body, + ), + axis=-1, + ) + + # Clip observation + clipped_obs = np.clip(current_obs, -self.clip_observations, self.clip_observations) + + # Update history buffer and stack + self.obs_buffer.append(clipped_obs) + self.obs[:] = np.concatenate(list(self.obs_buffer), axis=-1) + + # -------------------------------------------------------------------------- + # Main Deployment Loop + # -------------------------------------------------------------------------- + + def deploy_loop(self) -> None: + """Run MuJoCo simulation and policy control loop.""" + control_decimation = self.config["control_decimation"] + simulation_duration = self.config["simulation_duration"] + + assert self.m is not None and self.d is not None and self.policy is not None + + with mujoco.viewer.launch_passive(self.m, self.d) as viewer: + start_time = time.time() + + while viewer.is_running() and (time.time() - start_time) < simulation_duration: + step_start = time.time() + + # PD control + tau = pd_control( + self.target_dof_pos, + self.d.qpos[7:36], + self.kps, + np.zeros_like(self.kds), + self.d.qvel[6:35], + self.kds, + ) + self.d.ctrl[:] = tau + + # MuJoCo simulation step + mujoco.mj_step(self.m, self.d) + + # Policy control (update at decimation rate) + if self.counter % control_decimation == 0: + self.update_observation() + + # Policy inference + obs_tensor = torch.from_numpy(self.obs).unsqueeze(0).to(self.device) + with torch.no_grad(): + self.actions = self.policy(obs_tensor).cpu().detach().numpy().squeeze() + self.action_clipped = np.clip( + self.actions, -self.clip_actions, self.clip_actions + ) + + # Map action to target joint positions + self.target_dof_pos = ( + self.action_clipped * self.action_scale + self.default_angles + ) + + # Sync viewer + viewer.sync() + self.counter += 1 + + # Maintain real-time + time_until_next_step = self.m.opt.timestep - (time.time() - step_start) + if time_until_next_step > 0: + time.sleep(time_until_next_step) + + print("Simulation finished.") + + +# ============================================================================= +# Script Entry Point +# ============================================================================= + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Deploy a PyTorch JIT policy in MuJoCo." + ) + parser.add_argument( + "config_file", + type=str, + help="Config file name in deploy/deploy_mujoco/configs/", + ) + cli_args = parser.parse_args() + + try: + deployer = MujocoPolicyDeployer(cli_args.config_file) + deployer.deploy_loop() + except FileNotFoundError as e: + print(f"Error: {e}") + print("Please ensure the config file name is correct and all paths inside the config are valid.") + except RuntimeError as e: + print(f"Policy loading error (JIT/Torch): {e}") + print("Please re-export the policy using the play.py script to generate a valid .pt file.") + except Exception as e: + print(f"An unexpected error occurred: {e}") diff --git a/legged_gym/deploy/pre_train/g1/carrybox.pt b/legged_gym/deploy/pre_train/g1/carrybox.pt new file mode 100644 index 0000000..66e2374 Binary files /dev/null and b/legged_gym/deploy/pre_train/g1/carrybox.pt differ diff --git a/legged_gym/resources/robots/g1/g1_29dof.xml b/legged_gym/resources/robots/g1/g1_29dof.xml new file mode 100644 index 0000000..41b87d9 --- /dev/null +++ b/legged_gym/resources/robots/g1/g1_29dof.xml @@ -0,0 +1,340 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 0321e14..ef3f1c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ scipy setuptools tqdm wandb -tensorboard \ No newline at end of file +tensorboard +mujoco \ No newline at end of file