EO-Robotics · DelinQu · Sep 14, 2025 · Sep 14, 2025 · Sep 14, 2025 · Sep 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -233,6 +233,7 @@ tools/caption_video.html
 experiments/2_libero/logs
 experiments/2_libero/logs/videos
 demo_data/demos25
+**/videos/**
 
 demo_data/libero_spatial_no_noops_1.0.0_lerobot
 experiments/test
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "experiments/7_franka/deoxys_control"]
+	path = experiments/7_franka/deoxys_control
+	url = https://github.com/UT-Austin-RPL/deoxys_control.git
diff --git a/experiments/7_franka/README.md b/experiments/7_franka/README.md
@@ -0,0 +1,96 @@
+# Franka Emika Panda Robot Control with EO-1
+
+This directory contains the implementation for controlling Franka Emika Panda robots using the EO-1 model. The system enables real-time robot manipulation tasks through vision-language-action integration.
+
+## 🚀 Quick Start
+
+### Prerequisites
+
+**Hardware Requirements:**
+
+- Franka Emika Panda robot arm
+- RealSense cameras (or compatible RGB cameras)
+- **NUC**: Configured with real-time kernel for robot control
+- **Workstation**: Equipped with GPU for model inference
+
+**Software Requirements:**
+
+- Ubuntu 20.04+ with CUDA support
+- Python 3.10+
+- Real-time kernel configuration on NUC
+- Deoxys control system properly configured
+
+### Installation
+
+1. **Setup submodules:**
+
+```bash
+git submodule update --init --recursive experiments/7_franka/deoxys_control
+```
+
+2. **Configure robot control system:**
+   Follow the [Deoxys Documentation](https://zhuyifengzju.github.io/deoxys_docs/html/index.html) to configure your NUC and workstation for Franka control.
+
+3. **Install dependencies on workstation**
+
+```bash
+# Create conda environment
+conda create -n eo python=3.10
+conda activate eo
+
+# Install deoxys for workstation
+pip install -e experiments/7_franka/deoxys_control/deoxys
+
+# Install additional requirements
+pip install -r experiments/7_franka/requirements.txt
+```
+
+**Note**: The NUC handles real-time robot control while the workstation runs the EO-1 model inference. Both systems must be properly configured according to the Deoxys documentation.
+
+## 🤖 Running Robot Control
+
+### Basic Usage
+
+```bash
+python experiments/7_franka/eval_franka.py \
+    --model-path "path/to/your/model" \
+    --repo-id libero_spatial_no_noops_1.0.0_lerobot \
+    --task "Pick and place a cube" \
+    --video-out-path experiments/7_franka/videos \
+    --max-timesteps 300
+```
+
+### Parameters
+
+| Parameter          | Description                                  | Default                                 |
+| ------------------ | -------------------------------------------- | --------------------------------------- |
+| `--model-path`     | Path to the trained EO-1 model checkpoint    | Required                                |
+| `--repo-id`        | Dataset repository ID for task specification | `libero_spatial_no_noops_1.0.0_lerobot` |
+| `--task`           | Natural language description of the task     | `"Pick and place a cube"`               |
+| `--video-out-path` | Directory to save recorded videos            | `experiments/7_franka/videos`           |
+| `--max-timesteps`  | Maximum number of control steps              | `300`                                   |
+| `--resize-size`    | Image resize dimensions for model input      | `224`                                   |
+| `--replan-steps`   | RHC control horizon                          | `5`                                     |
+
+### Camera Configuration
+
+The system supports multiple camera inputs. Update the camera serial numbers in `eval_franka.py`:
+
+```python
+# Camera serial numbers (update these with your actual camera IDs)
+EGO_CAMERA = "213522070137"  # Wrist camera
+THIRD_CAMERA = "243222074139"  # External camera
+```
+
+## 🔒 Safety Considerations
+
+- Always ensure proper workspace setup before operation
+- Monitor robot movements and be ready to use emergency stop
+- Verify camera positioning for optimal visual coverage
+
+## 📝 Notes
+
+- The system requires both wrist and external cameras for optimal performance
+- Model performance depends on lighting conditions and camera positioning
+- Regular calibration of the robot and cameras is recommended
+- Check the video output directory for recorded demonstrations
diff --git a/experiments/7_franka/deoxys_control b/experiments/7_franka/deoxys_control
diff --git a/experiments/7_franka/eval_franka.py b/experiments/7_franka/eval_franka.py
@@ -0,0 +1,196 @@
+import os
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+import collections
+import copy
+import dataclasses
+import os.path as osp
+import time
+from datetime import datetime
+from pathlib import Path
+
+import cv2
+import deoxys.utils.transform_utils as dft
+import imageio
+import numpy as np
+import torch
+import tqdm
+import tyro
+from deoxys import config_root
+from deoxys.experimental.motion_utils import reset_joints_to
+from deoxys.franka_interface import FrankaInterface
+from deoxys.utils import YamlConfig
+from PIL import Image
+from realsense_camera import MultiCamera
+from transformers import AutoModel, AutoProcessor
+
+# Add your camera serial numbers here
+EGO_CAMERA = "213522070137"
+THIRD_CAMERA = "243222074139"
+
+reset_joint_positions = [
+    0.0760389047913384,
+    -1.0362613022620384,
+    -0.054254247684777324,
+    -2.383951857286591,
+    -0.004505598470154735,
+    1.3820559157131187,
+    0.784935455988679,
+]
+
+
+def save_rollout_video(rollout_images, save_dir):
+    """Saves an MP4 replay of an episode."""
+    date_time = time.strftime("%Y_%m_%d-%H_%M_%S")
+    mp4_path = Path(save_dir) / f"{date_time}.mp4"
+    video_writer = imageio.get_writer(mp4_path, fps=5)
+    for img in rollout_images:
+        video_writer.append_data(img)
+    video_writer.close()
+    print(f"Saved rollout MP4 at path {mp4_path}")
+
+
+@dataclasses.dataclass
+class Args:
+    #################################################################################################################
+    # Model parameters
+    #################################################################################################################
+    resize_size: int = 224
+    replan_steps: int = 5
+    model_path: str = ""
+    repo_id: str = ""
+    task: str = ""
+
+    #################################################################################################################
+    # Utils
+    #################################################################################################################
+    video_out_path: Path = Path("experiments/7_franka/videos")  # Path to save videos
+    max_timesteps: int = 300  # Number of timesteps to run
+
+
+def convert_gripper_action(action):
+    action[-1] = 1 - action[-1]
+    if action[-1] < 0.5:
+        action[-1] = -1
+
+    return action
+
+
+def get_robot_interface():
+    robot_interface = FrankaInterface(osp.join(config_root, "charmander.yml"))
+    controller_cfg = YamlConfig(osp.join(config_root, "osc-pose-controller.yml")).as_easydict()
+    controller_type = "OSC_POSE"
+
+    return robot_interface, controller_cfg, controller_type
+
+
+def main(args: Args):
+    multi_camera = MultiCamera()
+    robot_interface, controller_cfg, controller_type = get_robot_interface()
+
+    model = (
+        AutoModel.from_pretrained(args.model_path, dtype=torch.bfloat16, trust_remote_code=True).eval().cuda()
+    )
+
+    processor = AutoProcessor.from_pretrained(args.model_path, trust_remote_code=True)
+
+    while True:
+        action_plan = collections.deque()
+        input("Press Enter to start episode ...")
+        reset_joints_to(robot_interface, reset_joint_positions)
+
+        replay_images = []
+        bar = tqdm.tqdm(
+            range(args.max_timesteps),
+            position=0,
+            leave=True,
+            ncols=80,
+            desc="Rollout steps",
+        )
+
+        for _ in bar:
+            try:
+                images = multi_camera.get_frame()
+                last_state = robot_interface._state_buffer[-1]
+                last_gripper_state = robot_interface._gripper_state_buffer[-1]
+                frame, _ = images[THIRD_CAMERA]
+                ego_frame, _ = images[EGO_CAMERA]
+
+                if not action_plan:
+                    frame = copy.deepcopy(frame)
+                    ego_frame = copy.deepcopy(ego_frame)
+                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    ego_frame = cv2.cvtColor(ego_frame, cv2.COLOR_BGR2RGB)
+                    replay_images.append(frame)
+                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    ego_frame_rgb = cv2.cvtColor(ego_frame, cv2.COLOR_BGR2RGB)
+                    replay_images.append(frame_rgb.copy())
+
+                    eef_pose = np.asarray(last_state.O_T_EE, dtype=np.float32).reshape(4, 4).T
+                    eef_state = np.concatenate(
+                        (
+                            eef_pose[:3, -1],
+                            dft.mat2euler(
+                                eef_pose[:3, :-1],
+                            ),
+                        ),
+                        axis=-1,
+                    )
+                    gripper_state = np.array([last_gripper_state.width])
+                    state_data = np.concatenate([eef_state.flatten(), np.array([0]), gripper_state])
+
+                    frame_resized = cv2.resize(frame_rgb, (args.resize_size, args.resize_size))
+                    ego_frame_resized = cv2.resize(ego_frame_rgb, (args.resize_size, args.resize_size))
+
+                    ego_frame = Image.fromarray(ego_frame_resized)
+                    frame = Image.fromarray(frame_resized)
+
+                    # NOTE: Change the keys to match your model
+                    batch = {
+                        "observation.images.image": [frame],
+                        "observation.images.wrist_image": [ego_frame],
+                        "observation.state": [state_data],
+                        "task": [args.task],
+                        "repo_id": [args.repo_id],
+                    }
+                    ov_out = processor.select_action(
+                        model,
+                        batch,
+                    )
+                    action_chunk = ov_out.action[0].numpy()
+                    assert len(action_chunk) >= args.replan_steps, (
+                        f"We want to replan every {args.replan_steps} steps, but policy only predicts {len(action_chunk)} steps."
+                    )
+                    action_plan.extend(action_chunk[: args.replan_steps])
+
+                pred_action_chunk = action_plan.popleft()
+                action = pred_action_chunk
+                rotation_matrix = dft.euler2mat(action[3:6])
+                quat = dft.mat2quat(rotation_matrix)
+                axis_angle = dft.quat2axisangle(quat)
+                action[3:6] = axis_angle
+                action = convert_gripper_action(action)
+
+                robot_interface.control(
+                    controller_type=controller_type, action=action, controller_cfg=controller_cfg
+                )
+
+            except KeyboardInterrupt:
+                break
+
+        # saving video
+        video_save_path = args.video_out_path / args.task.replace(" ", "_")
+        video_save_path.mkdir(parents=True, exist_ok=True)
+        curr_time = datetime.now().strftime("%Y_%m_%d_%H:%M:%S")
+        save_path = video_save_path / f"{curr_time}.mp4"
+        video = np.stack(replay_images)
+        imageio.mimsave(save_path, video, fps=20)
+
+        if input("Do one more eval (default y)? [y/n]").lower() == "n":
+            break
+
+
+if __name__ == "__main__":
+    args = tyro.cli(Args)
+    main(args)