diff --git a/realworld/go2_vln_client.py b/realworld/go2_vln_client.py index 67d3840..36faa4e 100644 --- a/realworld/go2_vln_client.py +++ b/realworld/go2_vln_client.py @@ -12,9 +12,10 @@ from nav_msgs.msg import Odometry, Path from geometry_msgs.msg import PoseStamped from geometry_msgs.msg import Twist -from std_msgs.msg import Bool +from std_msgs.msg import Bool, String from cv_bridge import CvBridge from rclpy.node import Node +from rclpy.qos import QoSProfile, QoSDurabilityPolicy # unitree related from unitree_go.msg import SportModeState @@ -23,6 +24,7 @@ # user-specific from pid_controller import * +from utils import ReadWriteLock # global variable policy_init = True pid = PID_controller(Kp_trans=3.0, Kd_trans=0.5, Kp_yaw=3.0, Kd_yaw=0.5, max_v=1.0, max_w=1.2) @@ -31,16 +33,18 @@ rgb_rw_lock = ReadWriteLock() depth_rw_lock = ReadWriteLock() odom_rw_lock = ReadWriteLock() - -def eval_vln(image, depth, camera_pose, instruction, url='http://localhost:5801/eval_vln'): - global policy_init +instruction_rw_lock = ReadWriteLock() + +def eval_vln(image, depth, camera_pose, instruction, policy_init, url='http://localhost:5801/eval_vln'): image = PIL_Image.fromarray(image) # image = image.resize((384, 384)) image_bytes = io.BytesIO() image.save(image_bytes, format='jpeg') image_bytes.seek(0) - data = {"reset":policy_init} + data = {"reset":policy_init, + "instruction": instruction,} + json_data = json.dumps(data) policy_init = False @@ -50,7 +54,11 @@ def eval_vln(image, depth, camera_pose, instruction, url='http://localhost:5801/ print(f"total time(delay + policy): {time.time() - start}") print(response.text) - action = json.loads(response.text)['action'] + if(response.text): + action = json.loads(response.text)['action'] + else: + action = [0] + return action def control_thread(): @@ -82,11 +90,17 @@ def planning_thread(): odom_rw_lock.acquire_read() request_cnt = manager.request_cnt odom_rw_lock.release_read() + + instruction_rw_lock.acquire_read() + instruction = manager.instruction + policy_init = manager.policy_init + instruction_rw_lock.release_read() + if rgb_image is None: time.sleep(0.1) continue - actions = eval_vln(rgb_image, None, None, None) + actions = eval_vln(rgb_image, None, None, instruction=instruction, policy_init=policy_init) print(f"111") odom_rw_lock.acquire_write() manager.should_plan = False @@ -100,11 +114,16 @@ class Go2VlnManager(Node): def __init__(self): super().__init__('go2_manager') + # QoS: keep last message and deliver it to late subscribers + qos_profile = QoSProfile(depth=1) + qos_profile.durability = QoSDurabilityPolicy.TRANSIENT_LOCAL + # subsucriber self.rgb_sub = self.create_subscription(Image, "/camera/camera/color/image_raw", self.rgb_callback, 1) # self.depth_sub = self.create_subscription(Image, "/camera/camera/aligned_depth_to_color/image_raw", # self.depth_callback, 1) self.odom_sub = self.create_subscription(SportModeState, "/sportmodestate", self.odom_callback, 10) + self.instruction_sub = self.create_subscription(String, "/instruction", self.instruction_callback, qos_profile) # publisher self.control_pub = self.create_publisher(Request, '/api/sport/request', 5) @@ -116,12 +135,16 @@ def __init__(self): self.homo_goal = None self.homo_odom = None self.vel = None + + self.instruction = None + self.policy_init = None self.request_cnt = 0 self.odom_cnt = 0 self.should_plan = False self.last_plan_time = 0.0 + def rgb_callback(self, msg): rgb_rw_lock.acquire_write() raw_image = self.cv_bridge.imgmsg_to_cv2(msg, 'bgr8')[:, :, :] @@ -160,6 +183,14 @@ def odom_callback(self, msg): self.homo_goal = self.homo_odom.copy() odom_rw_lock.release_write() + def instruction_callback(self, msg): + instruction_rw_lock.acquire_write() + self.instruction = msg.data + self.policy_init = True + self.should_plan = True + print("Received instruction: ", self.instruction) + instruction_rw_lock.release_write() + def trigger_replan(self): self.should_plan = True diff --git a/realworld/realworld.md b/realworld/realworld.md index c58926a..efc1b6c 100644 --- a/realworld/realworld.md +++ b/realworld/realworld.md @@ -8,6 +8,16 @@ We provide the server code of StreamVLN and the client code of unitree Go2 to de 2. Install [realsense-ros](https://github.com/IntelRealSense/realsense-ros) package. +3. Install flask +```bash +pip install flask +``` + +4. Install FlashAttention2 +```bash +pip install flash-attn --no-build-isolation +``` + ## Run the Real-world Experiment 1. Run the realsense-ros on robot @@ -29,6 +39,11 @@ We provide the server code of StreamVLN and the client code of unitree Go2 to de python3 go2_vln_client.py ``` +4. Publish your custom natural language instruction to robot using "ros2 topic pub" command, such as "Turn around and find the door to the room, walk forward and stop in front of it." + ```bash + ros2 topic pub /instruction std_msgs/msg/String "data: 'Turn around and find the door to the room, walk forward and stop in front of it.'" -r 20 --times 3 + ``` + If everything goes well, the robot should start moving and the server should print the action sequence. diff --git a/streamvln/http_realworld_server.py b/streamvln/http_realworld_server.py index bfb4703..8b5af6d 100644 --- a/streamvln/http_realworld_server.py +++ b/streamvln/http_realworld_server.py @@ -71,7 +71,10 @@ def eval_vln(): image = np.asarray(image)[...,::-1] camera_pose = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) - instruction = "Walk forward and immediately stop when you exit the room." + + instruction = data.get("instruction", None) # <--- instruction string + if(instruction is None): + instruction = "Walk forward and immediately stop when you exit the room." policy_init = data['reset'] @@ -94,10 +97,10 @@ def eval_vln(): for i in range(4): t1 = time.time() depth = np.zeros((image.shape[0], image.shape[1], 1)) - return_action, generate_time, return_llm_output = evaluator.eval_action(0, + return_action, generate_time, return_llm_output = evaluator.step(0, image, - depth, - camera_pose, + # depth, + # camera_pose, instruction, run_model=(evaluator.step_id % 4 == 0)) llm_output = return_llm_output if return_llm_output is not None else llm_output