LASR-at-Home · larace27 · Nov 20, 2025
diff --git a/common/speech/lasr_speech_recognition_interfaces/CMakeLists.txt b/common/speech/lasr_speech_recognition_interfaces/CMakeLists.txt
@@ -21,6 +21,7 @@ rosidl_generate_interfaces(${PROJECT_NAME}
   "action/TranscribeSpeech.action"
   "msg/Transcription.msg"
   "srv/TranscribeAudio.srv"
+  "srv/Wakeword.srv"
   DEPENDENCIES builtin_interfaces # Add packages that above messages depend on
 )
 

diff --git a/common/speech/lasr_speech_recognition_interfaces/srv/Wakeword.srv b/common/speech/lasr_speech_recognition_interfaces/srv/Wakeword.srv
@@ -0,0 +1,6 @@
+string[] keywords   # keywords to detect, should match a model name
+float32 timeout     # number of seconds after which to exit
+float32 threshold   # confidence threshold for keyword detection
+---
+bool success        # if any keyword was detected
+string keyword      # detected keyword
diff --git a/common/speech/lasr_wakewords/CMakeLists.txt b/common/speech/lasr_wakewords/CMakeLists.txt
@@ -0,0 +1,18 @@
+cmake_minimum_required(VERSION 3.5)
+project(lasr_wakewords)
+
+# This is a simple Python-node package in ROS2
+find_package(ament_cmake REQUIRED)
+
+# Install the Python node as an executable
+install(PROGRAMS
+  nodes/wakeword_service
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+install(DIRECTORY
+  models
+  DESTINATION share/${PROJECT_NAME}
+)
+
+ament_package()
diff --git a/common/speech/lasr_wakewords/models/hi_tiago.onnx b/common/speech/lasr_wakewords/models/hi_tiago.onnx
diff --git a/common/speech/lasr_wakewords/models/hi_tiago.tflite b/common/speech/lasr_wakewords/models/hi_tiago.tflite
diff --git a/common/speech/lasr_wakewords/models/no.onnx b/common/speech/lasr_wakewords/models/no.onnx
diff --git a/common/speech/lasr_wakewords/models/no.tflite b/common/speech/lasr_wakewords/models/no.tflite
diff --git a/common/speech/lasr_wakewords/models/yes.onnx b/common/speech/lasr_wakewords/models/yes.onnx
diff --git a/common/speech/lasr_wakewords/models/yes.tflite b/common/speech/lasr_wakewords/models/yes.tflite
diff --git a/common/speech/lasr_wakewords/nodes/wakeword_service b/common/speech/lasr_wakewords/nodes/wakeword_service
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+import os
+import time
+import threading
+
+import numpy as np
+import sounddevice as sd
+import openwakeword
+from openwakeword.model import Model
+
+import rclpy
+from rclpy.node import Node
+from ament_index_python.packages import get_package_share_directory
+
+from lasr_speech_recognition_interfaces.srv import Wakeword
+
+
+class WakewordDetector:
+    """Wraps the wakeword detection logic using openwakeword."""
+
+    def __init__(self, node: Node):
+        self._node = node
+        self._logger = node.get_logger()
+
+        # Declare parameters with defaults (matching ROS1)
+        self._node.declare_parameter("device_index", 9)
+        self._node.declare_parameter("sample_rate", 16000)
+        self._node.declare_parameter("frame_samples", 1280)
+
+        self._device_index = (
+            self._node.get_parameter("device_index").get_parameter_value().integer_value
+        )
+        self._sample_rate = (
+            self._node.get_parameter("sample_rate").get_parameter_value().integer_value
+        )
+        self._frame_samples = (
+            self._node.get_parameter("frame_samples")
+            .get_parameter_value()
+            .integer_value
+        )
+
+        # Model path: use the installed package share directory in ROS2
+        pkg_share = get_package_share_directory("lasr_wakewords")
+        self._model_path = os.path.join(pkg_share, "models")
+
+        self._logger.info(
+            f"WakewordDetector init: device_index={self._device_index}, "
+            f"sample_rate={self._sample_rate}, frame_samples={self._frame_samples}"
+        )
+        self._logger.info(f"Model path: {self._model_path}")
+
+    def detect(self, keywords, timeout: float, threshold: float):
+        """Run wakeword detection for one request.
+
+        Returns (success: bool, keyword: Optional[str]).
+        """
+        wakewords = list(keywords)
+        max_duration = timeout
+        detected = threading.Event()
+        detected_keyword = ""
+
+        self._logger.info(
+            f"Listening for wakewords: {wakewords}, "
+            f"threshold={threshold}, timeout={max_duration}s"
+        )
+
+        model_paths = [
+            os.path.join(self._model_path, f"{wakeword}.tflite")
+            for wakeword in wakewords
+        ]
+
+        try:
+            model = Model(model_paths)
+        except Exception as e:
+            self._logger.error(f"Failed to load model: {e}")
+            return False, None
+
+        def audio_callback(indata, frames, time_info, status):
+            nonlocal detected_keyword
+            if status:
+                self._logger.warn(f"Audio stream status: {status}")
+            # Convert float32 [-1,1] to int16 PCM like ROS1 code
+            pcm = (indata[:, 0] * 32768).astype(np.int16)
+            result = model.predict(pcm)
+            self._logger.info(str(result))
+            wakeword = max(result, key=lambda k: result[k])
+            score = result[wakeword]
+            if score > threshold:
+                self._logger.info(
+                    f"Wakeword '{wakeword}' detected (score={score:.3f})"
+                )
+                detected_keyword = wakeword
+                detected.set()
+
+        try:
+            with sd.InputStream(
+                device=self._device_index,
+                channels=1,
+                samplerate=self._sample_rate,
+                blocksize=self._frame_samples,
+                dtype="float32",
+                callback=audio_callback,
+            ):
+                start_time = time.monotonic()
+
+                while rclpy.ok() and not detected.is_set():
+                    elapsed = time.monotonic() - start_time
+                    if max_duration > 0 and elapsed >= max_duration:
+                        self._logger.info(
+                            f"Timeout reached after {elapsed:.1f} seconds"
+                        )
+                        break
+                    detected.wait(timeout=0.1)
+        except Exception as e:
+            self._logger.error(f"Error opening InputStream: {e}")
+            return False, None
+
+        # If we got here, success is True iff keyword set
+        return (detected_keyword != ""), (detected_keyword or None)
+
+
+class WakewordService(Node):
+    def __init__(self):
+        super().__init__("wakeword_service")
+
+        # Create the detector (loads params, models, etc.)
+        self.detector = WakewordDetector(self)
+
+        # Advertise the service
+        self.srv = self.create_service(
+            Wakeword,
+            "/lasr_wakewords/detect",
+            self.handle_detect,
+        )
+        self.get_logger().info("Wakeword service /lasr_wakewords/detect ready")
+
+    def handle_detect(self, request: Wakeword.Request, response: Wakeword.Response):
+        self.get_logger().info(
+            f"Wakeword request: keywords={list(request.keywords)}, "
+            f"timeout={request.timeout}, threshold={request.threshold}"
+        )
+
+        success, keyword = self.detector.detect(
+            keywords=request.keywords,
+            timeout=float(request.timeout),
+            threshold=float(request.threshold),
+        )
+
+        response.success = bool(success)
+        response.keyword = keyword or ""
+
+        return response
+
+
+def main(args=None):
+    # Downloaded the models 
+    openwakeword.utils.download_models()
+
+    rclpy.init(args=args)
+    node = WakewordService()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    node.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/common/speech/lasr_wakewords/package.xml b/common/speech/lasr_wakewords/package.xml
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+<package format="3">
+  <name>lasr_wakewords</name>
+  <version>0.0.0</version>
+  <description>The lasr_wakewords package</description>
+
+  <!-- One maintainer tag required, multiple allowed, one person per tag -->
+  <!-- Example:  -->
+  <!-- <maintainer email="[email protected]">Jane Doe</maintainer> -->
+  <maintainer email="[email protected]">Siyao Li</maintainer>
+
+
+  <!-- One license tag required, multiple allowed, one license per tag -->
+  <!-- Commonly used license strings: -->
+  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
+  <license>MIT</license>
+
+
+  <!-- Url tags are optional, but multiple are allowed, one per tag -->
+  <!-- Optional attribute type can be: website, bugtracker, or repository -->
+  <!-- Example: -->
+  <!-- <url type="website">http://wiki.ros.org/lasr_speech_recognition_msgs</url> -->
+
+
+  <!-- Author tags are optional, multiple are allowed, one per tag -->
+  <!-- Authors do not have to be maintainers, but could be -->
+  <!-- Example: -->
+  <!-- <author email="[email protected]">Jane Doe</author> -->
+
+
+  <!-- The *depend tags are used to specify dependencies -->
+  <!-- Dependencies can be catkin packages or system dependencies -->
+  <!-- Examples: -->
+  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
+  <!--   <depend>roscpp</depend> -->
+  <!--   Note that this is equivalent to the following: -->
+  <!--   <build_depend>roscpp</build_depend> -->
+  <!--   <exec_depend>roscpp</exec_depend> -->
+  <!-- Use build_depend for packages you need at compile time: -->
+  <!--   <build_depend>message_generation</build_depend> -->
+  <!-- Use build_export_depend for packages you need in order to build against this package: -->
+  <!--   <build_export_depend>message_generation</build_export_depend> -->
+  <!-- Use buildtool_depend for build tool packages: -->
+  <!--   <buildtool_depend>catkin</buildtool_depend> -->
+  <!-- Use exec_depend for packages you need at runtime: -->
+  <!--   <exec_depend>message_runtime</exec_depend> -->
+  <!-- Use test_depend for packages you need only for testing: -->
+  <!--   <test_depend>gtest</test_depend> -->
+  <!-- Use doc_depend for packages you need only for building documentation: -->
+  <!--   <doc_depend>doxygen</doc_depend> -->
+ <!-- ROS2 build system -->
+ <buildtool_depend>ament_cmake</buildtool_depend>
+
+ <!-- Runtime dependencies -->
+ <exec_depend>rclpy</exec_depend>
+ <exec_depend>lasr_speech_recognition_interfaces</exec_depend>
+
+ <export>
+    <!-- You can add things here later if needed -->
+ </export>
+</package>
+
diff --git a/common/speech/lasr_wakewords/requirements.in b/common/speech/lasr_wakewords/requirements.in
@@ -0,0 +1,2 @@
+openwakeword
+sounddevice
diff --git a/common/speech/lasr_wakewords/requirements.txt b/common/speech/lasr_wakewords/requirements.txt
@@ -0,0 +1,24 @@
+certifi==2025.6.15        # via requests
+cffi==1.17.1              # via sounddevice
+charset-normalizer==3.4.2  # via requests
+coloredlogs==15.0.1       # via onnxruntime
+flatbuffers==25.2.10      # via onnxruntime
+humanfriendly==10.0       # via coloredlogs
+idna==3.10                # via requests
+joblib==1.4.2             # via scikit-learn
+mpmath==1.3.0             # via sympy
+numpy==1.24.4             # via onnxruntime, scikit-learn, scipy, tflite-runtime
+onnxruntime==1.19.2       # via openwakeword
+openwakeword==0.6.0       # via -r requirements.in
+packaging==25.0           # via onnxruntime
+protobuf==5.29.5          # via onnxruntime
+pycparser==2.22           # via cffi
+requests==2.32.4          # via openwakeword
+scikit-learn==1.3.2       # via openwakeword
+scipy==1.10.1             # via openwakeword, scikit-learn
+sounddevice==0.5.2        # via -r requirements.in
+sympy==1.13.3             # via onnxruntime
+tflite-runtime==2.14.0    # via openwakeword
+threadpoolctl==3.5.0      # via scikit-learn
+tqdm==4.67.1              # via openwakeword
+urllib3==2.2.3            # via requests
diff --git a/common/speech/lasr_wakewords/setup.py b/common/speech/lasr_wakewords/setup.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+
+from distutils.core import setup
+from catkin_pkg.python_setup import generate_distutils_setup
+
+setup_args = generate_distutils_setup(
+    packages=["lasr_wakewords"], package_dir={"": "src"}
+)
+
+setup(**setup_args)
diff --git a/skills/src/lasr_skills/listen_for_wakeword.py b/skills/src/lasr_skills/listen_for_wakeword.py
@@ -0,0 +1,53 @@
+from typing import List, Union
+
+import smach
+import smach_ros
+
+from lasr_speech_recognition_interfaces.srv import Wakeword
+
+
+class ListenForWakeword(smach.StateMachine):
+
+    def __init__(
+        self, wakeword: Union[str, List[str]], timeout: float, threshold: float
+    ) -> None:
+
+        super(ListenForWakeword, self).__init__(
+            outcomes=["succeeded", "failed"], output_keys=["keyword"]
+        )
+
+        # Normalise to list
+        if isinstance(wakeword, str):
+            wakeword = [wakeword]
+
+        # Built a ROS2 Wakeword.Request object 
+        request = Wakeword.Request()
+        request.keywords = list(wakeword)
+        request.timeout = float(timeout)
+        request.threshold = float(threshold)
+
+        with self:
+            smach.StateMachine.add(
+                "LISTEN_FOR_WAKEWORD",
+                smach_ros.ServiceState(
+                    "/lasr_wakewords/detect",
+                    Wakeword,
+                    request=request,
+                    response_slots=["success", "keyword"],
+                ),
+                transitions={
+                    "succeeded": "DETERMINE_OUTCOME",
+                    "preempted": "failed",
+                    "aborted": "failed",
+                },
+            )
+
+            smach.StateMachine.add(
+                "DETERMINE_OUTCOME",
+                smach.CBState(
+                    lambda ud: "succeeded" if ud.success else "failed",
+                    outcomes=["succeeded", "failed"],
+                    input_keys=["success"],
+                ),
+                transitions={"succeeded": "succeeded", "failed": "failed"},
+            )