diff --git a/CHANGELOG.md b/CHANGELOG.md index 95f44ee..4f911e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Final releases will consolidate all intermediate changes in chronological order. * feat(tests): add coverage reporting with pytest-cov * feat(tests): add tests for v0.2.0 changes (utils coercions, config redaction, on_track_subscribed fix, new defaults) * build: add GitHub Actions workflow for running tests +* fix: handle "auto" locale to prevent invalid language code sent to Gladia ## v0.2.0 * feat(stt): support INTERIM transcriptions diff --git a/gladia_stt_agent.py b/gladia_stt_agent.py index 624795d..3969118 100644 --- a/gladia_stt_agent.py +++ b/gladia_stt_agent.py @@ -77,7 +77,7 @@ def start_transcription_for_user(self, user_id: str, locale: str, provider: str) return gladia_locale = self._sanitize_locale(locale) - stt_stream = self.stt.stream(language=gladia_locale) + stt_stream = self.stt.stream(language=gladia_locale) if gladia_locale else self.stt.stream() task = asyncio.create_task( self._run_transcription_pipeline(participant, track, stt_stream) ) @@ -105,7 +105,10 @@ def update_locale_for_user(self, user_id: str, locale: str): logging.info(f"Updating locale to '{locale}' for user {user_id}.") stream = self.processing_info[user_id]["stream"] gladia_locale = self._sanitize_locale(locale) - stream.update_options(languages=[gladia_locale]) + if gladia_locale: + stream.update_options(languages=[gladia_locale]) + else: + stream.update_options(languages=[]) else: logging.warning( f"Won't update locale, no active transcription for user {user_id}." @@ -169,10 +172,15 @@ def _find_audio_track(self, participant: rtc.RemoteParticipant) -> rtc.Track | N return pub.track return None - def _sanitize_locale(self, locale: str) -> str: + def _sanitize_locale(self, locale: str) -> str | None: # Gladia only accepts ISO 639-1 locales (e.g. "en") # BBB uses - format (e.g. "en-US") # Sanitization here is to ensure we use Gladia's format. + # "auto" is not a valid ISO language code — returning None omits the + # language parameter so Gladia falls back to server-side auto-detection. + if locale.lower() == "auto": + return None + gladia_locale = locale.split("-")[0].lower() return gladia_locale diff --git a/main.py b/main.py index 482e0ad..fa752c1 100644 --- a/main.py +++ b/main.py @@ -104,7 +104,10 @@ async def on_final_transcript( ) return - original_lang = original_locale.split("-")[0] + # When locale is "auto", Gladia auto-detects — use the detected + # language from the transcript to resolve the BBB locale via the map. + is_auto = original_locale.lower() == "auto" + original_lang = None if is_auto else original_locale.split("-")[0] for alternative in event.alternatives: if _is_below_min_confidence( @@ -138,7 +141,7 @@ async def on_final_transcript( "alternative": alternative, }, ) - if transcript_lang == original_lang: + if not is_auto and transcript_lang == original_lang: # This is the original transcript, use the original BBB locale bbb_locale = original_locale else: @@ -181,7 +184,8 @@ async def on_interim_transcript( ) return - original_lang = original_locale.split("-")[0] + is_auto = original_locale.lower() == "auto" + original_lang = None if is_auto else original_locale.split("-")[0] min_utterance_length = p_settings.get("min_utterance_length", 0) for alternative in event.alternatives: @@ -238,7 +242,7 @@ async def on_interim_transcript( }, ) - if transcript_lang == original_lang: + if not is_auto and transcript_lang == original_lang: bbb_locale = original_locale else: bbb_locale = gladia_config.translation_lang_map.get(transcript_lang) diff --git a/tests/test_agent.py b/tests/test_agent.py index 6bd744c..dd5e493 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -68,6 +68,12 @@ def test_lowercases_language_code(self): assert agent._sanitize_locale("EN-US") == "en" assert agent._sanitize_locale("PT") == "pt" + def test_returns_none_for_auto(self): + agent = _make_agent() + assert agent._sanitize_locale("auto") is None + assert agent._sanitize_locale("Auto") is None + assert agent._sanitize_locale("AUTO") is None + class TestStopTranscriptionForUser: def test_cancels_task_and_removes_from_processing_info(self): @@ -105,6 +111,17 @@ def test_calls_stream_update_options_when_transcription_active(self): mock_stream.update_options.assert_called_once_with(languages=["de"]) + def test_sends_empty_languages_for_auto_locale(self): + """'auto' locale should send empty languages list to trigger Gladia auto-detection.""" + agent = _make_agent() + agent.participant_settings["user_1"] = {"locale": "en", "provider": "gladia"} + mock_stream = MagicMock() + agent.processing_info["user_1"] = {"stream": mock_stream, "task": MagicMock()} + + agent.update_locale_for_user("user_1", "auto") + + mock_stream.update_options.assert_called_once_with(languages=[]) + def test_sanitizes_bcp47_locale_for_stream_update(self): """update_locale_for_user should sanitize 'de-DE' → 'de' for the stream.""" agent = _make_agent() @@ -290,6 +307,21 @@ async def test_sanitizes_locale_before_creating_stream(self): with contextlib.suppress(asyncio.CancelledError): await agent.processing_info["user_1"]["task"] + async def test_omits_language_param_for_auto_locale(self): + """Locale 'auto' should call stream() with no language param for Gladia auto-detection.""" + mock_track = MagicMock() + mock_track.kind = rtc.TrackKind.KIND_AUDIO + participant = _make_participant("user_1", audio_track=mock_track) + agent = _make_agent_with_room(participants={"pid": participant}) + + with patch("gladia_stt_agent.rtc.AudioStream"): + agent.start_transcription_for_user("user_1", "auto", "gladia") + agent.stt.stream.assert_called_once_with() + + agent.processing_info["user_1"]["task"].cancel() + with contextlib.suppress(asyncio.CancelledError): + await agent.processing_info["user_1"]["task"] + class TestRunTranscriptionPipeline: async def test_cancellation_cleans_up_processing_info(self):