diff --git a/.gitignore b/.gitignore
index 47a7942..ae1b171 100644
--- a/.gitignore
+++ b/.gitignore
@@ -233,6 +233,7 @@ tools/caption_video.html
 experiments/2_libero/logs
 experiments/2_libero/logs/videos
 demo_data/demos25
+**/videos/**
 
 demo_data/libero_spatial_no_noops_1.0.0_lerobot
 experiments/test
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..c369b77
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "experiments/7_franka/deoxys_control"]
+	path = experiments/7_franka/deoxys_control
+	url = https://github.com/UT-Austin-RPL/deoxys_control.git
diff --git a/experiments/7_franka/README.md b/experiments/7_franka/README.md
new file mode 100644
index 0000000..4533e9f
--- /dev/null
+++ b/experiments/7_franka/README.md
@@ -0,0 +1,96 @@
+# Franka Emika Panda Robot Control with EO-1
+
+This directory contains the implementation for controlling Franka Emika Panda robots using the EO-1 model. The system enables real-time robot manipulation tasks through vision-language-action integration.
+
+## 🚀 Quick Start
+
+### Prerequisites
+
+**Hardware Requirements:**
+
+- Franka Emika Panda robot arm
+- RealSense cameras (or compatible RGB cameras)
+- **NUC**: Configured with real-time kernel for robot control
+- **Workstation**: Equipped with GPU for model inference
+
+**Software Requirements:**
+
+- Ubuntu 20.04+ with CUDA support
+- Python 3.10+
+- Real-time kernel configuration on NUC
+- Deoxys control system properly configured
+
+### Installation
+
+1. **Setup submodules:**
+
+```bash
+git submodule update --init --recursive experiments/7_franka/deoxys_control
+```
+
+2. **Configure robot control system:**
+   Follow the [Deoxys Documentation](https://zhuyifengzju.github.io/deoxys_docs/html/index.html) to configure your NUC and workstation for Franka control.
+
+3. **Install dependencies on workstation**
+
+```bash
+# Create conda environment
+conda create -n eo python=3.10
+conda activate eo
+
+# Install deoxys for workstation
+pip install -e experiments/7_franka/deoxys_control/deoxys
+
+# Install additional requirements
+pip install -r experiments/7_franka/requirements.txt
+```
+
+**Note**: The NUC handles real-time robot control while the workstation runs the EO-1 model inference. Both systems must be properly configured according to the Deoxys documentation.
+
+## 🤖 Running Robot Control
+
+### Basic Usage
+
+```bash
+python experiments/7_franka/eval_franka.py \
+    --model-path "path/to/your/model" \
+    --repo-id libero_spatial_no_noops_1.0.0_lerobot \
+    --task "Pick and place a cube" \
+    --video-out-path experiments/7_franka/videos \
+    --max-timesteps 300
+```
+
+### Parameters
+
+| Parameter          | Description                                  | Default                                 |
+| ------------------ | -------------------------------------------- | --------------------------------------- |
+| `--model-path`     | Path to the trained EO-1 model checkpoint    | Required                                |
+| `--repo-id`        | Dataset repository ID for task specification | `libero_spatial_no_noops_1.0.0_lerobot` |
+| `--task`           | Natural language description of the task     | `"Pick and place a cube"`               |
+| `--video-out-path` | Directory to save recorded videos            | `experiments/7_franka/videos`           |
+| `--max-timesteps`  | Maximum number of control steps              | `300`                                   |
+| `--resize-size`    | Image resize dimensions for model input      | `224`                                   |
+| `--replan-steps`   | RHC control horizon                          | `5`                                     |
+
+### Camera Configuration
+
+The system supports multiple camera inputs. Update the camera serial numbers in `eval_franka.py`:
+
+```python
+# Camera serial numbers (update these with your actual camera IDs)
+EGO_CAMERA = "213522070137"  # Wrist camera
+THIRD_CAMERA = "243222074139"  # External camera
+```
+
+## 🔒 Safety Considerations
+
+- Always ensure proper workspace setup before operation
+- Monitor robot movements and be ready to use emergency stop
+- Verify camera positioning for optimal visual coverage
+
+## 📝 Notes
+
+- The system requires both wrist and external cameras for optimal performance
+- Model performance depends on lighting conditions and camera positioning
+- Regular calibration of the robot and cameras is recommended
+- Check the video output directory for recorded demonstrations
diff --git a/experiments/7_franka/deoxys_control b/experiments/7_franka/deoxys_control
new file mode 160000
index 0000000..97396fd
--- /dev/null
+++ b/experiments/7_franka/deoxys_control
@@ -0,0 +1 @@
+Subproject commit 97396fd91324e9e961f061544e80a208889526ff
diff --git a/experiments/7_franka/eval_franka.py b/experiments/7_franka/eval_franka.py
new file mode 100644
index 0000000..2ba5bba
--- /dev/null
+++ b/experiments/7_franka/eval_franka.py
@@ -0,0 +1,196 @@
+import os
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+import collections
+import copy
+import dataclasses
+import os.path as osp
+import time
+from datetime import datetime
+from pathlib import Path
+
+import cv2
+import deoxys.utils.transform_utils as dft
+import imageio
+import numpy as np
+import torch
+import tqdm
+import tyro
+from deoxys import config_root
+from deoxys.experimental.motion_utils import reset_joints_to
+from deoxys.franka_interface import FrankaInterface
+from deoxys.utils import YamlConfig
+from PIL import Image
+from realsense_camera import MultiCamera
+from transformers import AutoModel, AutoProcessor
+
+# Add your camera serial numbers here
+EGO_CAMERA = "213522070137"
+THIRD_CAMERA = "243222074139"
+
+reset_joint_positions = [
+    0.0760389047913384,
+    -1.0362613022620384,
+    -0.054254247684777324,
+    -2.383951857286591,
+    -0.004505598470154735,
+    1.3820559157131187,
+    0.784935455988679,
+]
+
+
+def save_rollout_video(rollout_images, save_dir):
+    """Saves an MP4 replay of an episode."""
+    date_time = time.strftime("%Y_%m_%d-%H_%M_%S")
+    mp4_path = Path(save_dir) / f"{date_time}.mp4"
+    video_writer = imageio.get_writer(mp4_path, fps=5)
+    for img in rollout_images:
+        video_writer.append_data(img)
+    video_writer.close()
+    print(f"Saved rollout MP4 at path {mp4_path}")
+
+
+@dataclasses.dataclass
+class Args:
+    #################################################################################################################
+    # Model parameters
+    #################################################################################################################
+    resize_size: int = 224
+    replan_steps: int = 5
+    model_path: str = ""
+    repo_id: str = ""
+    task: str = ""
+
+    #################################################################################################################
+    # Utils
+    #################################################################################################################
+    video_out_path: Path = Path("experiments/7_franka/videos")  # Path to save videos
+    max_timesteps: int = 300  # Number of timesteps to run
+
+
+def convert_gripper_action(action):
+    action[-1] = 1 - action[-1]
+    if action[-1] < 0.5:
+        action[-1] = -1
+
+    return action
+
+
+def get_robot_interface():
+    robot_interface = FrankaInterface(osp.join(config_root, "charmander.yml"))
+    controller_cfg = YamlConfig(osp.join(config_root, "osc-pose-controller.yml")).as_easydict()
+    controller_type = "OSC_POSE"
+
+    return robot_interface, controller_cfg, controller_type
+
+
+def main(args: Args):
+    multi_camera = MultiCamera()
+    robot_interface, controller_cfg, controller_type = get_robot_interface()
+
+    model = (
+        AutoModel.from_pretrained(args.model_path, dtype=torch.bfloat16, trust_remote_code=True).eval().cuda()
+    )
+
+    processor = AutoProcessor.from_pretrained(args.model_path, trust_remote_code=True)
+
+    while True:
+        action_plan = collections.deque()
+        input("Press Enter to start episode ...")
+        reset_joints_to(robot_interface, reset_joint_positions)
+
+        replay_images = []
+        bar = tqdm.tqdm(
+            range(args.max_timesteps),
+            position=0,
+            leave=True,
+            ncols=80,
+            desc="Rollout steps",
+        )
+
+        for _ in bar:
+            try:
+                images = multi_camera.get_frame()
+                last_state = robot_interface._state_buffer[-1]
+                last_gripper_state = robot_interface._gripper_state_buffer[-1]
+                frame, _ = images[THIRD_CAMERA]
+                ego_frame, _ = images[EGO_CAMERA]
+
+                if not action_plan:
+                    frame = copy.deepcopy(frame)
+                    ego_frame = copy.deepcopy(ego_frame)
+                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    ego_frame = cv2.cvtColor(ego_frame, cv2.COLOR_BGR2RGB)
+                    replay_images.append(frame)
+                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    ego_frame_rgb = cv2.cvtColor(ego_frame, cv2.COLOR_BGR2RGB)
+                    replay_images.append(frame_rgb.copy())
+
+                    eef_pose = np.asarray(last_state.O_T_EE, dtype=np.float32).reshape(4, 4).T
+                    eef_state = np.concatenate(
+                        (
+                            eef_pose[:3, -1],
+                            dft.mat2euler(
+                                eef_pose[:3, :-1],
+                            ),
+                        ),
+                        axis=-1,
+                    )
+                    gripper_state = np.array([last_gripper_state.width])
+                    state_data = np.concatenate([eef_state.flatten(), np.array([0]), gripper_state])
+
+                    frame_resized = cv2.resize(frame_rgb, (args.resize_size, args.resize_size))
+                    ego_frame_resized = cv2.resize(ego_frame_rgb, (args.resize_size, args.resize_size))
+
+                    ego_frame = Image.fromarray(ego_frame_resized)
+                    frame = Image.fromarray(frame_resized)
+
+                    # NOTE: Change the keys to match your model
+                    batch = {
+                        "observation.images.image": [frame],
+                        "observation.images.wrist_image": [ego_frame],
+                        "observation.state": [state_data],
+                        "task": [args.task],
+                        "repo_id": [args.repo_id],
+                    }
+                    ov_out = processor.select_action(
+                        model,
+                        batch,
+                    )
+                    action_chunk = ov_out.action[0].numpy()
+                    assert len(action_chunk) >= args.replan_steps, (
+                        f"We want to replan every {args.replan_steps} steps, but policy only predicts {len(action_chunk)} steps."
+                    )
+                    action_plan.extend(action_chunk[: args.replan_steps])
+
+                pred_action_chunk = action_plan.popleft()
+                action = pred_action_chunk
+                rotation_matrix = dft.euler2mat(action[3:6])
+                quat = dft.mat2quat(rotation_matrix)
+                axis_angle = dft.quat2axisangle(quat)
+                action[3:6] = axis_angle
+                action = convert_gripper_action(action)
+
+                robot_interface.control(
+                    controller_type=controller_type, action=action, controller_cfg=controller_cfg
+                )
+
+            except KeyboardInterrupt:
+                break
+
+        # saving video
+        video_save_path = args.video_out_path / args.task.replace(" ", "_")
+        video_save_path.mkdir(parents=True, exist_ok=True)
+        curr_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
+        save_path = video_save_path / f"{curr_time}.mp4"
+        video = np.stack(replay_images)
+        imageio.mimsave(save_path, video, fps=20)
+
+        if input("Do one more eval (default y)? [y/n]").lower() == "n":
+            break
+
+
+if __name__ == "__main__":
+    args = tyro.cli(Args)
+    main(args)
diff --git a/experiments/7_franka/realsense_camera.py b/experiments/7_franka/realsense_camera.py
new file mode 100755
index 0000000..bd29d95
--- /dev/null
+++ b/experiments/7_franka/realsense_camera.py
@@ -0,0 +1,185 @@
+import numpy as np
+import pyrealsense2 as rs
+
+# the camera intrinsics from the camera calibration
+serial_number_to_cam_intr = {
+    "243222074139": {
+        "fx": 604.987548828125,
+        "fy": 604.9332885742188,
+        "px": 325.9312438964844,
+        "py": 243.00851440429688,
+    },  # eye-to-hand
+    "213522070137": {
+        "fx": 596.382688,
+        "fy": 596.701788,
+        "px": 333.837001,
+        "py": 254.401211,
+    },  # eye-in-hand
+}
+
+
+class Camera:
+    def __init__(
+        self,
+        serial_number: str | None = "213522070137",
+        camera_width: int = 640,
+        camera_height: int = 480,
+        camera_fps: int = 30,
+    ) -> None:
+        self.camera_width = camera_width
+        self.camera_height = camera_height
+        self.camera_fps = camera_fps
+        self.pipeline = rs.pipeline()
+        # self.pipeline.wait_for_frames(9999)
+        self.config = rs.config()
+
+        if serial_number is not None:
+            self.config.enable_device(serial_number)
+        self.config.enable_stream(
+            rs.stream.depth,
+            self.camera_width,
+            self.camera_height,
+            rs.format.z16,
+            self.camera_fps,
+        )
+        self.config.enable_stream(
+            rs.stream.color,
+            self.camera_width,
+            self.camera_height,
+            rs.format.bgr8,
+            self.camera_fps,
+        )
+
+        self.profile = self.pipeline.start(self.config)
+
+        depth_sensor = self.profile.get_device().first_depth_sensor()
+        self.depth_scale: float = depth_sensor.get_depth_scale()
+        print("Depth Scale is: ", self.depth_scale)
+
+        align_to = rs.stream.color
+        self.align = rs.align(align_to)
+
+    def close(self):
+        self.pipeline.stop()
+
+    def get_frame(self, filter=True) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Args:
+            filter bool optional Whether to apply filters to depth frames. Defaults to True.
+        Returns:
+            color_image np.ndarray shape=(H, W, 3) Color image(BGR)
+            depth_image np.ndarray shape=(H, W) Depth image in meters
+        """
+
+        depth_to_disparity = rs.disparity_transform(True)
+
+        spatial = rs.spatial_filter()
+        spatial.set_option(rs.option.filter_magnitude, 2)
+        spatial.set_option(rs.option.filter_smooth_alpha, 0.5)
+        spatial.set_option(rs.option.filter_smooth_delta, 20)
+
+        temporal = rs.temporal_filter()
+        temporal.set_option(rs.option.filter_smooth_alpha, 0.4)
+        temporal.set_option(rs.option.filter_smooth_delta, 20)
+
+        disparity_to_depth = rs.disparity_transform(False)
+
+        hole_filling = rs.hole_filling_filter()
+
+        frames = self.pipeline.wait_for_frames()
+        aligned_frames = self.align.process(frames)
+        depth_frame = aligned_frames.get_depth_frame()
+        color_frame = aligned_frames.get_color_frame()
+
+        # Apply the filters to depth frames
+        if filter:
+            filtered_depth = depth_to_disparity.process(depth_frame)
+            filtered_depth = spatial.process(filtered_depth)
+            filtered_depth = temporal.process(filtered_depth)
+            filtered_depth = disparity_to_depth.process(filtered_depth)
+            filtered_depth = hole_filling.process(filtered_depth)
+
+            depth_image = np.asanyarray(filtered_depth.get_data())
+            color_image = np.asanyarray(color_frame.get_data())
+        else:
+            depth_image = np.asanyarray(depth_frame.get_data())
+            color_image = np.asanyarray(color_frame.get_data())
+
+        return color_image, depth_image * self.depth_scale
+
+    def get_camera_intrinsics(self, use_raw=False, serial_number: str | None = "213522070137"):
+        """
+        Args:
+            use_raw bool optional Whether to use the camera intrinsics from the raw stream. Defaults to False.
+        Returns:
+            {
+                "fx": Focal length x,
+                "fy": Focal length y,
+                "px": Principal point x,
+                "py": Principal point y
+            }
+        """
+        if use_raw:
+            profile = self.profile.get_stream(rs.stream.color)
+            intr = profile.as_video_stream_profile().get_intrinsics()
+            return {"px": intr.ppx, "py": intr.ppy, "fx": intr.fx, "fy": intr.fy}
+        return serial_number_to_cam_intr[serial_number]
+
+
+class MultiCamera:
+    def __init__(
+        self,
+        serial_numbers: list[str] | None = None,
+        camera_width: int = 640,
+        camera_height: int = 480,
+        camera_fps: int = 30,
+    ) -> None:
+        all_serial_numbers = []
+        for d in sorted(rs.context().devices, key=lambda x: x.get_info(rs.camera_info.serial_number)):
+            print(
+                "Found device: ",
+                d.get_info(rs.camera_info.name),
+                " ",
+                d.get_info(rs.camera_info.serial_number),
+            )
+            if d.get_info(rs.camera_info.name).lower() != "platform camera":
+                all_serial_numbers.append(d.get_info(rs.camera_info.serial_number))
+
+        if serial_numbers is None:
+            serial_numbers = all_serial_numbers
+        else:
+            serial_numbers = [
+                serial_number for serial_number in serial_numbers if serial_number in all_serial_numbers
+            ]
+
+        print("Using cameras with serial numbers: ", serial_numbers)
+
+        self.cameras = {
+            serial_number: Camera(serial_number, camera_width, camera_height, camera_fps)
+            for serial_number in serial_numbers
+        }
+
+        for _ in range(20):
+            print("filter")
+            self.get_frame()
+
+    def close(self):
+        for camera in self.cameras.values():
+            camera.close()
+
+    def get_frame(
+        self, serial_numbers: list[str] | None = None, filter=True
+    ) -> dict[str, tuple[np.ndarray, np.ndarray]]:
+        if serial_numbers is None:
+            serial_numbers = list(self.cameras.keys())
+        return {serial_number: self.cameras[serial_number].get_frame() for serial_number in serial_numbers}
+
+    def get_camera_intrinsics(
+        self, serial_numbers: list[str] | None = None, use_raw=False
+    ) -> dict[str, dict[str, float]]:
+        if serial_numbers is None:
+            serial_numbers = self.cameras.keys()
+        return {
+            serial_number: self.cameras[serial_number].get_camera_intrinsics(use_raw, serial_number)
+            for serial_number in serial_numbers
+        }
diff --git a/experiments/7_franka/requirements.txt b/experiments/7_franka/requirements.txt
new file mode 100644
index 0000000..c9ca9a2
--- /dev/null
+++ b/experiments/7_franka/requirements.txt
@@ -0,0 +1,8 @@
+torch
+transformers>=4.56.0
+opencv-python
+imageio
+tyro
+pillow
+pyrealsense2
+lerobot
diff --git a/scripts/eval_policy.py b/scripts/eval_policy.py
index 01192d5..e494895 100644
--- a/scripts/eval_policy.py
+++ b/scripts/eval_policy.py
@@ -22,12 +22,16 @@
 
 def eval_policy():
     # set the observation (image, state, etc.)
-    image0 = "demo_data/example.png"
-    image1 = Image.open("demo_data/example.png")
+    import numpy as np
 
-    model = AutoModel.from_pretrained(args.model_path, dtype=torch.bfloat16).eval().cuda()
+    image0 = (np.random.rand(224, 224, 3) * 255).astype(np.uint8)
+    image1 = Image.fromarray(image0.copy())
 
-    processor = AutoProcessor.from_pretrained(args.model_path)
+    model = (
+        AutoModel.from_pretrained(args.model_path, dtype=torch.bfloat16, trust_remote_code=True).eval().cuda()
+    )
+
+    processor = AutoProcessor.from_pretrained(args.model_path, trust_remote_code=True)
 
     batch = {
         "observation.images.image": [image0],