Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 38 additions & 7 deletions realworld/go2_vln_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from nav_msgs.msg import Odometry, Path
from geometry_msgs.msg import PoseStamped
from geometry_msgs.msg import Twist
from std_msgs.msg import Bool
from std_msgs.msg import Bool, String
from cv_bridge import CvBridge
from rclpy.node import Node
from rclpy.qos import QoSProfile, QoSDurabilityPolicy

# unitree related
from unitree_go.msg import SportModeState
Expand All @@ -23,6 +24,7 @@

# user-specific
from pid_controller import *
from utils import ReadWriteLock
# global variable
policy_init = True
pid = PID_controller(Kp_trans=3.0, Kd_trans=0.5, Kp_yaw=3.0, Kd_yaw=0.5, max_v=1.0, max_w=1.2)
Expand All @@ -31,16 +33,18 @@
rgb_rw_lock = ReadWriteLock()
depth_rw_lock = ReadWriteLock()
odom_rw_lock = ReadWriteLock()
def eval_vln(image, depth, camera_pose, instruction, url='http://localhost:5801/eval_vln'):
global policy_init
instruction_rw_lock = ReadWriteLock()

def eval_vln(image, depth, camera_pose, instruction, policy_init, url='http://localhost:5801/eval_vln'):
image = PIL_Image.fromarray(image)
# image = image.resize((384, 384))
image_bytes = io.BytesIO()
image.save(image_bytes, format='jpeg')
image_bytes.seek(0)

data = {"reset":policy_init}
data = {"reset":policy_init,
"instruction": instruction,}

json_data = json.dumps(data)
policy_init = False

Expand All @@ -50,7 +54,11 @@ def eval_vln(image, depth, camera_pose, instruction, url='http://localhost:5801/
print(f"total time(delay + policy): {time.time() - start}")
print(response.text)

action = json.loads(response.text)['action']
if(response.text):
action = json.loads(response.text)['action']
else:
action = [0]

return action

def control_thread():
Expand Down Expand Up @@ -82,11 +90,17 @@ def planning_thread():
odom_rw_lock.acquire_read()
request_cnt = manager.request_cnt
odom_rw_lock.release_read()

instruction_rw_lock.acquire_read()
instruction = manager.instruction
policy_init = manager.policy_init
instruction_rw_lock.release_read()

if rgb_image is None:
time.sleep(0.1)
continue

actions = eval_vln(rgb_image, None, None, None)
actions = eval_vln(rgb_image, None, None, instruction=instruction, policy_init=policy_init)
print(f"111")
odom_rw_lock.acquire_write()
manager.should_plan = False
Expand All @@ -100,11 +114,16 @@ class Go2VlnManager(Node):
def __init__(self):
super().__init__('go2_manager')

# QoS: keep last message and deliver it to late subscribers
qos_profile = QoSProfile(depth=1)
qos_profile.durability = QoSDurabilityPolicy.TRANSIENT_LOCAL

# subsucriber
self.rgb_sub = self.create_subscription(Image, "/camera/camera/color/image_raw", self.rgb_callback, 1)
# self.depth_sub = self.create_subscription(Image, "/camera/camera/aligned_depth_to_color/image_raw",
# self.depth_callback, 1)
self.odom_sub = self.create_subscription(SportModeState, "/sportmodestate", self.odom_callback, 10)
self.instruction_sub = self.create_subscription(String, "/instruction", self.instruction_callback, qos_profile)

# publisher
self.control_pub = self.create_publisher(Request, '/api/sport/request', 5)
Expand All @@ -116,12 +135,16 @@ def __init__(self):
self.homo_goal = None
self.homo_odom = None
self.vel = None

self.instruction = None
self.policy_init = None

self.request_cnt = 0
self.odom_cnt = 0

self.should_plan = False
self.last_plan_time = 0.0

def rgb_callback(self, msg):
rgb_rw_lock.acquire_write()
raw_image = self.cv_bridge.imgmsg_to_cv2(msg, 'bgr8')[:, :, :]
Expand Down Expand Up @@ -160,6 +183,14 @@ def odom_callback(self, msg):
self.homo_goal = self.homo_odom.copy()
odom_rw_lock.release_write()

def instruction_callback(self, msg):
instruction_rw_lock.acquire_write()
self.instruction = msg.data
self.policy_init = True
self.should_plan = True
print("Received instruction: ", self.instruction)
instruction_rw_lock.release_write()

def trigger_replan(self):
self.should_plan = True

Expand Down
15 changes: 15 additions & 0 deletions realworld/realworld.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ We provide the server code of StreamVLN and the client code of unitree Go2 to de

2. Install [realsense-ros](https://github.com/IntelRealSense/realsense-ros) package.

3. Install flask
```bash
pip install flask
```

4. Install FlashAttention2
```bash
pip install flash-attn --no-build-isolation
```

## Run the Real-world Experiment

1. Run the realsense-ros on robot
Expand All @@ -29,6 +39,11 @@ We provide the server code of StreamVLN and the client code of unitree Go2 to de
python3 go2_vln_client.py
```

4. Publish your custom natural language instruction to robot using "ros2 topic pub" command, such as "Turn around and find the door to the room, walk forward and stop in front of it."
```bash
ros2 topic pub /instruction std_msgs/msg/String "data: 'Turn around and find the door to the room, walk forward and stop in front of it.'" -r 20 --times 3
```

If everything goes well, the robot should start moving and the server should print the action sequence.


Expand Down
11 changes: 7 additions & 4 deletions streamvln/http_realworld_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ def eval_vln():
image = np.asarray(image)[...,::-1]

camera_pose = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
instruction = "Walk forward and immediately stop when you exit the room."

instruction = data.get("instruction", None) # <--- instruction string
if(instruction is None):
instruction = "Walk forward and immediately stop when you exit the room."


policy_init = data['reset']
Expand All @@ -94,10 +97,10 @@ def eval_vln():
for i in range(4):
t1 = time.time()
depth = np.zeros((image.shape[0], image.shape[1], 1))
return_action, generate_time, return_llm_output = evaluator.eval_action(0,
return_action, generate_time, return_llm_output = evaluator.step(0,
image,
depth,
camera_pose,
# depth,
# camera_pose,
instruction,
run_model=(evaluator.step_id % 4 == 0))
llm_output = return_llm_output if return_llm_output is not None else llm_output
Expand Down