diff --git a/.gitignore b/.gitignore index ae1b171..1b46561 100644 --- a/.gitignore +++ b/.gitignore @@ -237,3 +237,4 @@ demo_data/demos25 demo_data/libero_spatial_no_noops_1.0.0_lerobot experiments/test +dev/ diff --git a/.gitmodules b/.gitmodules index c369b77..91d4717 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,9 @@ [submodule "experiments/7_franka/deoxys_control"] path = experiments/7_franka/deoxys_control url = https://github.com/UT-Austin-RPL/deoxys_control.git +[submodule "experiments/5_widowx/bridge_data_robot"] + path = experiments/5_widowx/bridge_data_robot + url = https://github.com/HaomingSong/bridge_data_robot.git +[submodule "experiments/5_widowx/edgeml"] + path = experiments/5_widowx/edgeml + url = https://github.com/youliangtan/edgeml.git diff --git a/experiments/5_widowx/README.md b/experiments/5_widowx/README.md new file mode 100644 index 0000000..2a4340a --- /dev/null +++ b/experiments/5_widowx/README.md @@ -0,0 +1,104 @@ +# WidowX 250s with EO-1 + +This directory contains the implementation for controlling WidowX 250s robots using the EO-1 model. The system enables real-time robot manipulation tasks through vision-language-action integration. + +## 🚀 Quick Start + +### Prerequisites + +**Hardware Requirements:** + +- WidowX 250s robot arm +- RealSense D435 camera (or compatible RGB camera) +- Compute options: + - Single GPU workstation (runs both ROS control and model inference) + - OR: NUC + GPU workstation (NUC for arm control, workstation for model inference) + +**Software Requirements:** + +- Ubuntu 20.04+ with CUDA support +- Python 3.10+ +- Docker (recommended for running the WidowX ROS control node on a workstation in single-machine mode) +- BridgeData WidowX controller stack properly configured + +Notes on architecture: + +- `Single-machine mode`: Run the WidowX ROS control node in Docker on the same GPU workstation used for EO-1 inference. +- `Dual-machine mode`: Use a NUC for robot control and a GPU workstation for model inference. For WidowX, the NUC does not require a real-time kernel in this setup. + +### Installation + +1. **Setup submodules:** + +```bash +git submodule update --init --recursive experiments/5_widowx/bridge_data_robot +git submodule update --init --recursive experiments/5_widowx/edgeml +``` + +2. **Configure robot control system:** + Follow the BridgeData WidowX controller setup in [bridge_data_robot](https://github.com/HaomingSong/bridge_data_robot?tab=readme-ov-file#setup) to configure your NUC/workstation for WidowX 250s control: + +3. **Install dependencies on workstation** + +```bash +# Create conda environment +conda create -n eo python=3.10 +conda activate eo + +# Install WidowX envs for workstation +pip install -e experiments/5_widowx/bridge_data_robot/widowx_envs +pip install -e experiments/5_widowx/edgeml + +# Install additional requirements +pip install -r experiments/5_widowx/requirements.txt +``` + +**Note**: In dual-machine mode, ensure the workstation can reach the control host (robot IP/port) over the network. In single-machine mode, ensure Docker has access to USB and camera devices. + +## 🤖 Running Robot Control + +### Basic Usage + +```bash +python experiments/5_widowx/eval_widowx.py \ + --model-path "path/to/your/model" \ + --repo-id libero_spatial_no_noops_1.0.0_lerobot \ + --default-instruction "Put the eggplant in the basket" \ + --robot-ip 10.6.8.122 \ + --robot-port 5556 \ + --max-timesteps 120 +``` + +### Parameters + +| Parameter | Description | Default | +| ----------------------- | ----------------------------------------- | -------------------------------- | +| `--model-path` | Path to the trained EO-1 model checkpoint | Required | +| `--repo-id` | Dataset/repo ID for task specification | Required | +| `--default-instruction` | Default natural language instruction | "Put the eggplant in the basket" | +| `--roll-out-path` | Directory to save rollouts/videos | experiments/5_widowx/logs | +| `--max-timesteps` | Maximum number of control steps | 120 | +| `--im-size` | Image size for model input | 224 | +| `--action-horizon` | Receding-horizon (RHC) execution steps | 2 | +| `--blocking` | Use blocking control for step execution | False | +| `--robot-ip` | Robot/control host IP | 10.6.8.122 | +| `--robot-port` | Robot/control host port | 5556 | + +### Camera Configuration + +- Default color topic for RealSense D435 is `/D435/color/image_raw` (see `CAMERA_TOPICS` in `eval_widowx.py`). +- Mount and wire the D435 according to the hardware guide: [BridgeData V2 Hardware Setup](https://docs.google.com/document/d/1si-6cTElTWTgflwcZRPfgHU7-UwfCUkEztkH3ge5CGc/edit?tab=t.0). +- If your camera topic differs, update `CAMERA_TOPICS` or the controller configuration accordingly. + +## 🔒 Safety Considerations + +- Always ensure proper workspace setup and clear the workspace before operation. +- Monitor robot movements and be ready to use the emergency stop. +- Verify camera positioning and exposure for optimal visual coverage. + +## 📝 Notes + +- This setup uses a single external D435 stream by default; wrist camera is optional. +- Model performance depends on lighting, viewpoint, and calibration quality. +- Regular calibration of the robot and camera(s) is recommended. +- Rollouts and videos are saved under `--roll-out-path`. diff --git a/experiments/5_widowx/bridge_data_robot b/experiments/5_widowx/bridge_data_robot new file mode 160000 index 0000000..b841131 --- /dev/null +++ b/experiments/5_widowx/bridge_data_robot @@ -0,0 +1 @@ +Subproject commit b841131ecd512bafb303075bd8f8b677e0bf9f1f diff --git a/experiments/5_widowx/edgeml b/experiments/5_widowx/edgeml new file mode 160000 index 0000000..b4b8495 --- /dev/null +++ b/experiments/5_widowx/edgeml @@ -0,0 +1 @@ +Subproject commit b4b8495b489e7c973187742d2f2fe9aa016d9aca diff --git a/experiments/5_widowx/eval_widowx.py b/experiments/5_widowx/eval_widowx.py new file mode 100644 index 0000000..994221c --- /dev/null +++ b/experiments/5_widowx/eval_widowx.py @@ -0,0 +1,247 @@ +""" +This script shows how we evaluated a finetuned EO-1 on a real WidowX robot, which is adapted from https://github.com/octo-models/octo/blob/main/examples/04_eval_finetuned_on_robot.py. +While the exact specifics may not be applicable to your use case, this script serves as a didactic example of how to use EO-1 in a real-world setting. + +If you wish, you may reproduce these results by [reproducing the robot setup](https://rail-berkeley.github.io/bridgedata/) +and installing [the robot controller](https://github.com/HaomingSong/bridge_data_robot.git) +""" + +import os + +os.environ["TOKENIZERS_PARALLELISM"] = "false" +import dataclasses +import pathlib +import time +from datetime import datetime + +import cv2 +import imageio +import numpy as np +import pandas as pd +import torch +import tqdm +import tyro +from PIL import Image +from transformers import AutoModel, AutoProcessor +from widowx_env import RHCWrapper, WidowXGym +from widowx_envs.widowx_env_service import WidowXConfigs + + +@dataclasses.dataclass +class Args: + ################################################################################################################# + # Model parameters + ################################################################################################################# + im_size: int = 224 + action_horizon: int = 2 + model_path: str = "" + repo_id: str = "" + + ################################################################################################################# + # WidowX environment-specific parameters + ################################################################################################################# + robot_ip: str = "10.6.8.122" # IP address of the robot + robot_port: int = 5556 # Port of the robot + initial_eep: tuple[float, float, float] = (0.3, 0.0, 0.25) # Initial position + # initial_eep: tuple[float, float, float] = (0.15, 0.0, 0.1) # Initial position + blocking: bool = False # Use the blocking controller + max_timesteps: int = 120 # Number of timesteps to run + default_instruction: str = "Put the eggplant in the basket" # Default instruction + + ################################################################################################################# + # Utils + ################################################################################################################# + show_image: bool = False # Show image + roll_out_path: pathlib.Path = pathlib.Path("experiments/5_widowx/logs") # Path to save videos + + +############################################################################## +STEP_DURATION_MESSAGE = """ +Bridge data was collected with non-blocking control and a step duration of 0.2s. +However, we relabel the actions to make it look like the data was collected with +blocking control and we evaluate with blocking control. +Be sure to use a step duration of 0.2 if evaluating with non-blocking control. +""" +STEP_DURATION = 0.2 +STICKY_GRIPPER_NUM_STEPS = 1 +WORKSPACE_BOUNDS = [[0.1, -0.15, -0.01, -1.57, 0], [0.45, 0.25, 0.25, 1.57, 0]] +CAMERA_TOPICS = [{"name": "/D435/color/image_raw"}] +ENV_PARAMS = { + "camera_topics": CAMERA_TOPICS, + "override_workspace_boundaries": WORKSPACE_BOUNDS, + "move_duration": STEP_DURATION, +} + +############################################################################## + + +def eval_bridge(args: Args) -> None: + curr_time = datetime.now().strftime("%Y_%m_%d_%H:%M:%S") + base_save_path = args.roll_out_path / pathlib.Path(args.default_instruction.replace(" ", "_")) / curr_time + + # set up the widowx client + start_state = np.concatenate([args.initial_eep, (0, 0, 0, 1)]) + env_params = WidowXConfigs.DefaultEnvParams.copy() + env_params.update(ENV_PARAMS) + env_params["start_state"] = list(start_state) + + env = WidowXGym( + env_params, + host=args.robot_ip, + port=args.robot_port, + im_size=args.im_size, + blocking=args.blocking, + sticky_gripper_num_steps=STICKY_GRIPPER_NUM_STEPS, + ) + if not args.blocking: + assert STEP_DURATION == 0.2, STEP_DURATION_MESSAGE + results_df = pd.DataFrame(columns=["success", "duration", "video_filename"]) + + model = ( + AutoModel.from_pretrained(args.model_path, dtype=torch.bfloat16, trust_remote_code=True).eval().cuda() + ) + + processor = AutoProcessor.from_pretrained(args.model_path, trust_remote_code=True) + + # switch TemporalEnsembleWrapper with RHCWrapper for receding horizon control + env = RHCWrapper(env, args.action_horizon) + + while True: + # reset env + obs, _ = env.reset() + time.sleep(2.0) + + if input(f"Use default instruction: {args.default_instruction}? (default y) [y/n]").lower() == "n": + instruction = input("Enter instruction: ") + else: + instruction = args.default_instruction + + # do rollout + images = [] + images.append(obs["full_image"]) + last_tstep = time.time() + bar = tqdm.tqdm( + range(args.max_timesteps), + position=0, + leave=True, + ncols=80, + desc="Rollout steps", + ) + + for t_step in bar: + try: + bar.set_description(f"Step {t_step}/{args.max_timesteps}") + if args.show_image: + cv2.imshow("img_view", obs["full_image"]) + cv2.waitKey(1) + + # prepare observation + # image = torch.from_numpy(obs["image_primary"] / 255).permute(2, 0, 1) + # [::-1, ::-1] + image = cv2.resize(obs["full_image"], (256, 256), interpolation=cv2.INTER_LINEAR) + # image = np.ascontiguousarray(obs["image_primary"]) + + # print("image",image.shape) + img = Image.fromarray(image) + batch = { + "observation.images.image": [img], + "observation.images.wrist_image": [img], + "observation.state": [obs["proprio"]], + "task": [str(instruction)], + "repo_id": [args.repo_id], + } + ov_out = processor.select_action(model, batch) + action_chunk = ov_out.action.squeeze(0).numpy() + + assert len(action_chunk) >= args.action_horizon, ( + f"We want to replan every {args.action_horizon} steps, but policy only predicts {len(action_chunk)} steps." + ) + + # perform environment step + obs, _, _, truncated, infos = env.step(action_chunk) + + # recording history images + for history_obs in infos["observations"]: + image = history_obs["full_image"] + images.append(image) + if truncated: + break + + # match the step duration + elapsed_time = time.time() - last_tstep + if elapsed_time < STEP_DURATION: + time.sleep(STEP_DURATION - elapsed_time) + + except KeyboardInterrupt: + break + time.sleep(0.2) + + # logging rollouts + success: str | float | None = None + while not isinstance(success, float): + success = input( + "Did the rollout succeed? (enter y for 100%, n for 0%, a float value 0-1, or a numeric value 0-100 based on the evaluation spec)" + ) + try: + if success == "y": + success = 1.0 + elif success == "n": + success = 0.0 + else: + success = float(success) + except Exception: + success = 0.0 + + video_save_path = ( + base_save_path + / "videos" + / f"{datetime.now().strftime('%Y_%m_%d-%H_%M_%S')}_success_{success:.2f}.mp4" + ) + + if not (0 <= success <= 1): + print(f"Success must be a number in [0, 1] but got: {success}") + + results_df = pd.concat( + [ + results_df, + pd.DataFrame( + [ + { + "instruction": instruction, + "success": success, + "duration": t_step, + "video_filename": video_save_path, + "model_path": args.model_path, + "repo_id": args.repo_id, + } + ] + ), + ], + ignore_index=True, + ) + + # saving video + video = np.stack(images) + video_save_path.parent.mkdir(parents=True, exist_ok=True) + imageio.mimsave(video_save_path, video, fps=1.0 / STEP_DURATION * 3) + + if ( + input(f"Already eval {len(results_df)} rollouts. Do one more eval (default y)? [y/n]").lower() + == "n" + ): + break + + # save results + csv_filename = base_save_path / "results.csv" + results_df.to_csv(csv_filename, index=False) + print(f"Results saved to {csv_filename}") + # print avg + print(f"Avg success: {results_df['success'].mean()}") + + +if __name__ == "__main__": + import logging + + logging.basicConfig(level=logging.INFO) + args: Args = tyro.cli(Args) + eval_bridge(args) diff --git a/experiments/5_widowx/requirements.txt b/experiments/5_widowx/requirements.txt new file mode 100644 index 0000000..a4bcf7b --- /dev/null +++ b/experiments/5_widowx/requirements.txt @@ -0,0 +1,3 @@ +gym +funcsigs +numpy==1.24.3 diff --git a/experiments/5_widowx/widowx_env.py b/experiments/5_widowx/widowx_env.py new file mode 100644 index 0000000..de6998f --- /dev/null +++ b/experiments/5_widowx/widowx_env.py @@ -0,0 +1,164 @@ +import time + +import cv2 +import gym +import numpy as np +from widowx_envs.widowx_env_service import WidowXClient + + +def listdict2dictlist(ld): + return {k: [dic[k] for dic in ld] for k in ld[0]} + + +class RHCWrapper(gym.Wrapper): + """ + Performs receding horizon control. The policy returns `pred_horizon` actions and + we execute `exec_horizon` of them. + """ + + def __init__(self, env: gym.Env, exec_horizon: int): + super().__init__(env) + self.exec_horizon = exec_horizon + + def step(self, actions): + if self.exec_horizon == 1 and len(actions.shape) == 1: + actions = actions[None] + assert len(actions) >= self.exec_horizon + rewards = [] + observations = [] + infos = [] + + for i in range(self.exec_horizon): + obs, reward, done, trunc, info = self.env.step(actions[i]) + observations.append(obs) + rewards.append(reward) + infos.append(info) + + if done or trunc: + break + + infos = listdict2dictlist(infos) + infos["rewards"] = rewards + infos["observations"] = observations + + return obs, np.sum(rewards), done, trunc, infos + + +def wait_for_obs(widowx_client): + obs = widowx_client.get_observation() + while obs is None: + print("Waiting for observations...") + obs = widowx_client.get_observation() + time.sleep(1) + return obs + + +def convert_obs(obs, im_size, *, flip=False): + # image_obs = cv2.resize(obs["image"], (im_size, im_size), interpolation=cv2.INTER_LINEAR) + image_obs = (obs["image"].reshape(3, im_size, im_size).transpose(1, 2, 0) * 255).astype(np.uint8) + full_image = obs["full_image"] + if flip: + image_obs = cv2.flip(image_obs, -1) + full_image = cv2.flip(full_image, -1) + # add padding to proprio to match training + proprio = np.concatenate([obs["state"][:6], [0], obs["state"][-1:]]) + + return { + "image_primary": image_obs, + "proprio": proprio, + "full_image": full_image, + } + + +def null_obs(img_size): + return { + "image_primary": np.zeros((img_size, img_size, 3), dtype=np.uint8), + "proprio": np.zeros((8,), dtype=np.float64), + "full_image": np.zeros((480, 640, 3), dtype=np.uint8), + } + + +class WidowXGym(gym.Env): + """ + A Gym environment for the WidowX controller provided by: + https://github.com/rail-berkeley/bridge_data_robot + Needed to use Gym wrappers. + """ + + def __init__( + self, + env_params: dict, + host: str = "localhost", + port: int = 5556, + im_size: int = 256, + *, + blocking: bool = True, + sticky_gripper_num_steps: int = 1, + ): + self.widowx_client = WidowXClient(host, port) + self.widowx_client.init(env_params, image_size=im_size) + self.env_params = env_params + self.im_size = im_size + self.blocking = blocking + self.observation_space = gym.spaces.Dict( + { + "image_primary": gym.spaces.Box( + low=np.zeros((im_size, im_size, 3)), + high=255 * np.ones((im_size, im_size, 3)), + dtype=np.uint8, + ), + "proprio": gym.spaces.Box(low=np.ones((8,)) * -1, high=np.ones((8,)), dtype=np.float64), + } + ) + self.action_space = gym.spaces.Box(low=np.zeros((7,)), high=np.ones((7,)), dtype=np.float64) + self.sticky_gripper_num_steps = sticky_gripper_num_steps + self.is_gripper_closed = False + self.num_consecutive_gripper_change_actions = 0 + + def step(self, action): + # sticky gripper logic + if (action[-1] < 0.5) != self.is_gripper_closed: + self.num_consecutive_gripper_change_actions += 1 + else: + self.num_consecutive_gripper_change_actions = 0 + + if self.num_consecutive_gripper_change_actions >= self.sticky_gripper_num_steps: + self.is_gripper_closed = not self.is_gripper_closed + self.num_consecutive_gripper_change_actions = 0 + action[-1] = 0.0 if self.is_gripper_closed else 1.0 + + self.widowx_client.step_action(action, blocking=self.blocking) + + raw_obs = self.widowx_client.get_observation() + + truncated = False + if raw_obs is None: + # this indicates a loss of connection with the server + # due to an exception in the last step so end the trajectory + truncated = True + obs = null_obs(self.im_size) # obs with all zeros + else: + obs = convert_obs( + raw_obs, + self.im_size, + flip=self.env_params["camera_topics"][0]["name"] == "/D435/color/image_raw", + ) + + return obs, 0, False, truncated, {} + + def reset(self, seed=None, options=None): + super().reset(seed=seed) + self.widowx_client.reset() + + self.is_gripper_closed = False + self.num_consecutive_gripper_change_actions = 0 + + raw_obs = wait_for_obs(self.widowx_client) + + obs = convert_obs( + raw_obs, + self.im_size, + flip=self.env_params["camera_topics"][0]["name"] == "/D435/color/image_raw", + ) + + return obs, {}