From d3f6ff17d45543c3dd71f6e7f0c8a859185c7a38 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 7 Feb 2026 02:39:40 +0000 Subject: [PATCH] feat(perf): add Parakeet model warmup for reduced first-run latency - Add `ParakeetManager.warmup()` to run dummy inference on startup. - Call `warmup()` in `ChirpApp` initialization. - Add unit test for warmup logic. This moves the ONNX Runtime initialization cost (buffer allocation, graph optimization) from the first user interaction to application startup, improving perceived responsiveness. Co-authored-by: Whamp <1115485+Whamp@users.noreply.github.com> --- .jules/bolt.md | 4 ++++ src/chirp/main.py | 1 + src/chirp/parakeet_manager.py | 9 +++++++++ tests/test_parakeet_manager.py | 25 +++++++++++++++++++++++++ 4 files changed, 39 insertions(+) diff --git a/.jules/bolt.md b/.jules/bolt.md index 96ecba4..93586f2 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,7 @@ ## 2024-02-18 - Pre-scaled Audio Feedback **Learning:** AudioFeedback volume scaling was applied during every playback, causing unnecessary numpy overhead and latency. **Action:** Pre-calculate scaled audio during `_load_and_cache` to minimize `_play_cached` latency (from ~1ms to ~0.04ms). + +## 2024-05-22 - ONNX Model Warmup +**Learning:** The first inference pass with ONNX Runtime incurs a significant "cold start" penalty (buffer allocation, graph optimization) which impacts user perceived latency for the first dictation. +**Action:** Implemented `ParakeetManager.warmup()` to run a dummy inference during application startup, shifting this cost away from the user interaction loop. diff --git a/src/chirp/main.py b/src/chirp/main.py index 19ea709..80a74ec 100644 --- a/src/chirp/main.py +++ b/src/chirp/main.py @@ -72,6 +72,7 @@ def __init__(self, *, verbose: bool = False) -> None: model_dir=model_dir, timeout=self.config.model_timeout, ) + self.parakeet.warmup() except ModelNotPreparedError as exc: self.logger.error(str(exc)) raise SystemExit(1) from exc diff --git a/src/chirp/parakeet_manager.py b/src/chirp/parakeet_manager.py index 54667ef..9c41a4a 100644 --- a/src/chirp/parakeet_manager.py +++ b/src/chirp/parakeet_manager.py @@ -78,6 +78,15 @@ def ensure_loaded(self): self._model = self._load_model() return self._model + def warmup(self) -> None: + """Performs a dummy inference to initialize ONNX buffers.""" + self._logger.debug("Warming up Parakeet model...") + dummy_audio = np.zeros(16_000, dtype=np.float32) + try: + self.transcribe(dummy_audio) + except Exception as exc: + self._logger.warning("Warmup failed: %s", exc) + def _resolve_providers(self, key: str) -> Sequence[str]: normalized = key.lower() if normalized != "cpu": diff --git a/tests/test_parakeet_manager.py b/tests/test_parakeet_manager.py index 7986945..265198b 100644 --- a/tests/test_parakeet_manager.py +++ b/tests/test_parakeet_manager.py @@ -88,6 +88,31 @@ def test_transcribe_reloads_and_updates_time(self, mock_time, mock_onnx): manager._stop_monitor.set() time.sleep(0.05) + @patch("chirp.parakeet_manager.onnx_asr") + def test_warmup(self, mock_onnx): + """Test that warmup performs dummy inference.""" + mock_model_instance = MagicMock() + mock_onnx.load_model.return_value = mock_model_instance + + manager = ParakeetManager( + model_name="test", + quantization=None, + provider_key="cpu", + threads=1, + logger=self.logger, + model_dir=self.model_dir, + timeout=100.0, + ) + + manager.warmup() + + # Verify recognize was called with silent audio + mock_model_instance.recognize.assert_called_once() + args, _ = mock_model_instance.recognize.call_args + audio_arg = args[0] + self.assertEqual(audio_arg.shape, (16000,)) + self.assertTrue(np.all(audio_arg == 0)) + @patch("chirp.parakeet_manager.onnx_asr") def test_timeout_zero_disables_monitor(self, mock_onnx): """Test that timeout=0 disables the monitor thread."""