Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ rosidl_generate_interfaces(${PROJECT_NAME}
"action/TranscribeSpeech.action"
"msg/Transcription.msg"
"srv/TranscribeAudio.srv"
"srv/Wakeword.srv"
DEPENDENCIES builtin_interfaces # Add packages that above messages depend on
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
string[] keywords # keywords to detect, should match a model name
float32 timeout # number of seconds after which to exit
float32 threshold # confidence threshold for keyword detection
---
bool success # if any keyword was detected
string keyword # detected keyword
18 changes: 18 additions & 0 deletions common/speech/lasr_wakewords/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
cmake_minimum_required(VERSION 3.5)
project(lasr_wakewords)

# This is a simple Python-node package in ROS2
find_package(ament_cmake REQUIRED)

# Install the Python node as an executable
install(PROGRAMS
nodes/wakeword_service
DESTINATION lib/${PROJECT_NAME}
)

install(DIRECTORY
models
DESTINATION share/${PROJECT_NAME}
)

ament_package()
Binary file not shown.
Binary file not shown.
Binary file added common/speech/lasr_wakewords/models/no.onnx
Binary file not shown.
Binary file added common/speech/lasr_wakewords/models/no.tflite
Binary file not shown.
Binary file added common/speech/lasr_wakewords/models/yes.onnx
Binary file not shown.
Binary file added common/speech/lasr_wakewords/models/yes.tflite
Binary file not shown.
170 changes: 170 additions & 0 deletions common/speech/lasr_wakewords/nodes/wakeword_service
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/usr/bin/env python3
import os
import time
import threading

import numpy as np
import sounddevice as sd
import openwakeword
from openwakeword.model import Model

import rclpy
from rclpy.node import Node
from ament_index_python.packages import get_package_share_directory

from lasr_speech_recognition_interfaces.srv import Wakeword


class WakewordDetector:
"""Wraps the wakeword detection logic using openwakeword."""

def __init__(self, node: Node):
self._node = node
self._logger = node.get_logger()

# Declare parameters with defaults (matching ROS1)
self._node.declare_parameter("device_index", 9)
self._node.declare_parameter("sample_rate", 16000)
self._node.declare_parameter("frame_samples", 1280)

self._device_index = (
self._node.get_parameter("device_index").get_parameter_value().integer_value
)
self._sample_rate = (
self._node.get_parameter("sample_rate").get_parameter_value().integer_value
)
self._frame_samples = (
self._node.get_parameter("frame_samples")
.get_parameter_value()
.integer_value
)

# Model path: use the installed package share directory in ROS2
pkg_share = get_package_share_directory("lasr_wakewords")
self._model_path = os.path.join(pkg_share, "models")

self._logger.info(
f"WakewordDetector init: device_index={self._device_index}, "
f"sample_rate={self._sample_rate}, frame_samples={self._frame_samples}"
)
self._logger.info(f"Model path: {self._model_path}")

def detect(self, keywords, timeout: float, threshold: float):
"""Run wakeword detection for one request.

Returns (success: bool, keyword: Optional[str]).
"""
wakewords = list(keywords)
max_duration = timeout
detected = threading.Event()
detected_keyword = ""

self._logger.info(
f"Listening for wakewords: {wakewords}, "
f"threshold={threshold}, timeout={max_duration}s"
)

model_paths = [
os.path.join(self._model_path, f"{wakeword}.tflite")
for wakeword in wakewords
]

try:
model = Model(model_paths)
except Exception as e:
self._logger.error(f"Failed to load model: {e}")
return False, None

def audio_callback(indata, frames, time_info, status):
nonlocal detected_keyword
if status:
self._logger.warn(f"Audio stream status: {status}")
# Convert float32 [-1,1] to int16 PCM like ROS1 code
pcm = (indata[:, 0] * 32768).astype(np.int16)
result = model.predict(pcm)
self._logger.info(str(result))
wakeword = max(result, key=lambda k: result[k])
score = result[wakeword]
if score > threshold:
self._logger.info(
f"Wakeword '{wakeword}' detected (score={score:.3f})"
)
detected_keyword = wakeword
detected.set()

try:
with sd.InputStream(
device=self._device_index,
channels=1,
samplerate=self._sample_rate,
blocksize=self._frame_samples,
dtype="float32",
callback=audio_callback,
):
start_time = time.monotonic()

while rclpy.ok() and not detected.is_set():
elapsed = time.monotonic() - start_time
if max_duration > 0 and elapsed >= max_duration:
self._logger.info(
f"Timeout reached after {elapsed:.1f} seconds"
)
break
detected.wait(timeout=0.1)
except Exception as e:
self._logger.error(f"Error opening InputStream: {e}")
return False, None

# If we got here, success is True iff keyword set
return (detected_keyword != ""), (detected_keyword or None)


class WakewordService(Node):
def __init__(self):
super().__init__("wakeword_service")

# Create the detector (loads params, models, etc.)
self.detector = WakewordDetector(self)

# Advertise the service
self.srv = self.create_service(
Wakeword,
"/lasr_wakewords/detect",
self.handle_detect,
)
self.get_logger().info("Wakeword service /lasr_wakewords/detect ready")

def handle_detect(self, request: Wakeword.Request, response: Wakeword.Response):
self.get_logger().info(
f"Wakeword request: keywords={list(request.keywords)}, "
f"timeout={request.timeout}, threshold={request.threshold}"
)

success, keyword = self.detector.detect(
keywords=request.keywords,
timeout=float(request.timeout),
threshold=float(request.threshold),
)

response.success = bool(success)
response.keyword = keyword or ""

return response


def main(args=None):
# Downloaded the models
openwakeword.utils.download_models()

rclpy.init(args=args)
node = WakewordService()
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
node.destroy_node()
rclpy.shutdown()


if __name__ == "__main__":
main()
62 changes: 62 additions & 0 deletions common/speech/lasr_wakewords/package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?xml version="1.0"?>
<package format="3">
<name>lasr_wakewords</name>
<version>0.0.0</version>
<description>The lasr_wakewords package</description>

<!-- One maintainer tag required, multiple allowed, one person per tag -->
<!-- Example: -->
<!-- <maintainer email="[email protected]">Jane Doe</maintainer> -->
<maintainer email="[email protected]">Siyao Li</maintainer>


<!-- One license tag required, multiple allowed, one license per tag -->
<!-- Commonly used license strings: -->
<!-- BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
<license>MIT</license>


<!-- Url tags are optional, but multiple are allowed, one per tag -->
<!-- Optional attribute type can be: website, bugtracker, or repository -->
<!-- Example: -->
<!-- <url type="website">http://wiki.ros.org/lasr_speech_recognition_msgs</url> -->


<!-- Author tags are optional, multiple are allowed, one per tag -->
<!-- Authors do not have to be maintainers, but could be -->
<!-- Example: -->
<!-- <author email="[email protected]">Jane Doe</author> -->


<!-- The *depend tags are used to specify dependencies -->
<!-- Dependencies can be catkin packages or system dependencies -->
<!-- Examples: -->
<!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
<!-- <depend>roscpp</depend> -->
<!-- Note that this is equivalent to the following: -->
<!-- <build_depend>roscpp</build_depend> -->
<!-- <exec_depend>roscpp</exec_depend> -->
<!-- Use build_depend for packages you need at compile time: -->
<!-- <build_depend>message_generation</build_depend> -->
<!-- Use build_export_depend for packages you need in order to build against this package: -->
<!-- <build_export_depend>message_generation</build_export_depend> -->
<!-- Use buildtool_depend for build tool packages: -->
<!-- <buildtool_depend>catkin</buildtool_depend> -->
<!-- Use exec_depend for packages you need at runtime: -->
<!-- <exec_depend>message_runtime</exec_depend> -->
<!-- Use test_depend for packages you need only for testing: -->
<!-- <test_depend>gtest</test_depend> -->
<!-- Use doc_depend for packages you need only for building documentation: -->
<!-- <doc_depend>doxygen</doc_depend> -->
<!-- ROS2 build system -->
<buildtool_depend>ament_cmake</buildtool_depend>

<!-- Runtime dependencies -->
<exec_depend>rclpy</exec_depend>
<exec_depend>lasr_speech_recognition_interfaces</exec_depend>

<export>
<!-- You can add things here later if needed -->
</export>
</package>

2 changes: 2 additions & 0 deletions common/speech/lasr_wakewords/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
openwakeword
sounddevice
24 changes: 24 additions & 0 deletions common/speech/lasr_wakewords/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
certifi==2025.6.15 # via requests
cffi==1.17.1 # via sounddevice
charset-normalizer==3.4.2 # via requests
coloredlogs==15.0.1 # via onnxruntime
flatbuffers==25.2.10 # via onnxruntime
humanfriendly==10.0 # via coloredlogs
idna==3.10 # via requests
joblib==1.4.2 # via scikit-learn
mpmath==1.3.0 # via sympy
numpy==1.24.4 # via onnxruntime, scikit-learn, scipy, tflite-runtime
onnxruntime==1.19.2 # via openwakeword
openwakeword==0.6.0 # via -r requirements.in
packaging==25.0 # via onnxruntime
protobuf==5.29.5 # via onnxruntime
pycparser==2.22 # via cffi
requests==2.32.4 # via openwakeword
scikit-learn==1.3.2 # via openwakeword
scipy==1.10.1 # via openwakeword, scikit-learn
sounddevice==0.5.2 # via -r requirements.in
sympy==1.13.3 # via onnxruntime
tflite-runtime==2.14.0 # via openwakeword
threadpoolctl==3.5.0 # via scikit-learn
tqdm==4.67.1 # via openwakeword
urllib3==2.2.3 # via requests
10 changes: 10 additions & 0 deletions common/speech/lasr_wakewords/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env python3

from distutils.core import setup
from catkin_pkg.python_setup import generate_distutils_setup

setup_args = generate_distutils_setup(
packages=["lasr_wakewords"], package_dir={"": "src"}
)

setup(**setup_args)
53 changes: 53 additions & 0 deletions skills/src/lasr_skills/listen_for_wakeword.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from typing import List, Union

import smach
import smach_ros

from lasr_speech_recognition_interfaces.srv import Wakeword


class ListenForWakeword(smach.StateMachine):

def __init__(
self, wakeword: Union[str, List[str]], timeout: float, threshold: float
) -> None:

super(ListenForWakeword, self).__init__(
outcomes=["succeeded", "failed"], output_keys=["keyword"]
)

# Normalise to list
if isinstance(wakeword, str):
wakeword = [wakeword]

# Built a ROS2 Wakeword.Request object
request = Wakeword.Request()
request.keywords = list(wakeword)
request.timeout = float(timeout)
request.threshold = float(threshold)

with self:
smach.StateMachine.add(
"LISTEN_FOR_WAKEWORD",
smach_ros.ServiceState(
"/lasr_wakewords/detect",
Wakeword,
request=request,
response_slots=["success", "keyword"],
),
transitions={
"succeeded": "DETERMINE_OUTCOME",
"preempted": "failed",
"aborted": "failed",
},
)

smach.StateMachine.add(
"DETERMINE_OUTCOME",
smach.CBState(
lambda ud: "succeeded" if ud.success else "failed",
outcomes=["succeeded", "failed"],
input_keys=["success"],
),
transitions={"succeeded": "succeeded", "failed": "failed"},
)
Loading