bigbluebutton · lrossillon-gladia · Mar 26, 2026 · Mar 26, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ Final releases will consolidate all intermediate changes in chronological order.
 * feat(tests): add coverage reporting with pytest-cov
 * feat(tests): add tests for v0.2.0 changes (utils coercions, config redaction, on_track_subscribed fix, new defaults)
 * build: add GitHub Actions workflow for running tests
+* fix: handle "auto" locale to prevent invalid language code sent to Gladia
 ## v0.2.0
 
 * feat(stt): support INTERIM transcriptions

diff --git a/gladia_stt_agent.py b/gladia_stt_agent.py
@@ -77,7 +77,7 @@ def start_transcription_for_user(self, user_id: str, locale: str, provider: str)
             return
 
         gladia_locale = self._sanitize_locale(locale)
-        stt_stream = self.stt.stream(language=gladia_locale)
+        stt_stream = self.stt.stream(language=gladia_locale) if gladia_locale else self.stt.stream()
         task = asyncio.create_task(
             self._run_transcription_pipeline(participant, track, stt_stream)
         )
@@ -105,7 +105,10 @@ def update_locale_for_user(self, user_id: str, locale: str):
             logging.info(f"Updating locale to '{locale}' for user {user_id}.")
             stream = self.processing_info[user_id]["stream"]
             gladia_locale = self._sanitize_locale(locale)
-            stream.update_options(languages=[gladia_locale])
+            if gladia_locale:
+                stream.update_options(languages=[gladia_locale])
+            else:
+                stream.update_options(languages=[])
         else:
             logging.warning(
                 f"Won't update locale, no active transcription for user {user_id}."
@@ -169,10 +172,15 @@ def _find_audio_track(self, participant: rtc.RemoteParticipant) -> rtc.Track | N
                 return pub.track
         return None
 
-    def _sanitize_locale(self, locale: str) -> str:
+    def _sanitize_locale(self, locale: str) -> str | None:
         # Gladia only accepts ISO 639-1 locales (e.g. "en")
         # BBB uses <ISO 639-1>-<ISO 3166-1> format (e.g. "en-US")
         # Sanitization here is to ensure we use Gladia's format.
+        # "auto" is not a valid ISO language code — returning None omits the
+        # language parameter so Gladia falls back to server-side auto-detection.
+        if locale.lower() == "auto":
+            return None
+
         gladia_locale = locale.split("-")[0].lower()
 
         return gladia_locale

diff --git a/main.py b/main.py
@@ -104,7 +104,10 @@ async def on_final_transcript(
             )
             return
 
-        original_lang = original_locale.split("-")[0]
+        # When locale is "auto", Gladia auto-detects — use the detected
+        # language from the transcript to resolve the BBB locale via the map.
+        is_auto = original_locale.lower() == "auto"
+        original_lang = None if is_auto else original_locale.split("-")[0]
 
         for alternative in event.alternatives:
             if _is_below_min_confidence(
@@ -138,7 +141,7 @@ async def on_final_transcript(
                     "alternative": alternative,
                 },
             )
-            if transcript_lang == original_lang:
+            if not is_auto and transcript_lang == original_lang:
                 # This is the original transcript, use the original BBB locale
                 bbb_locale = original_locale
             else:
@@ -181,7 +184,8 @@ async def on_interim_transcript(
             )
             return
 
-        original_lang = original_locale.split("-")[0]
+        is_auto = original_locale.lower() == "auto"
+        original_lang = None if is_auto else original_locale.split("-")[0]
         min_utterance_length = p_settings.get("min_utterance_length", 0)
 
         for alternative in event.alternatives:
@@ -238,7 +242,7 @@ async def on_interim_transcript(
                 },
             )
 
-            if transcript_lang == original_lang:
+            if not is_auto and transcript_lang == original_lang:
                 bbb_locale = original_locale
             else:
                 bbb_locale = gladia_config.translation_lang_map.get(transcript_lang)

diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -68,6 +68,12 @@ def test_lowercases_language_code(self):
         assert agent._sanitize_locale("EN-US") == "en"
         assert agent._sanitize_locale("PT") == "pt"
 
+    def test_returns_none_for_auto(self):
+        agent = _make_agent()
+        assert agent._sanitize_locale("auto") is None
+        assert agent._sanitize_locale("Auto") is None
+        assert agent._sanitize_locale("AUTO") is None
+
 
 class TestStopTranscriptionForUser:
     def test_cancels_task_and_removes_from_processing_info(self):
@@ -105,6 +111,17 @@ def test_calls_stream_update_options_when_transcription_active(self):
 
         mock_stream.update_options.assert_called_once_with(languages=["de"])
 
+    def test_sends_empty_languages_for_auto_locale(self):
+        """'auto' locale should send empty languages list to trigger Gladia auto-detection."""
+        agent = _make_agent()
+        agent.participant_settings["user_1"] = {"locale": "en", "provider": "gladia"}
+        mock_stream = MagicMock()
+        agent.processing_info["user_1"] = {"stream": mock_stream, "task": MagicMock()}
+
+        agent.update_locale_for_user("user_1", "auto")
+
+        mock_stream.update_options.assert_called_once_with(languages=[])
+
     def test_sanitizes_bcp47_locale_for_stream_update(self):
         """update_locale_for_user should sanitize 'de-DE' → 'de' for the stream."""
         agent = _make_agent()
@@ -290,6 +307,21 @@ async def test_sanitizes_locale_before_creating_stream(self):
             with contextlib.suppress(asyncio.CancelledError):
                 await agent.processing_info["user_1"]["task"]
 
+    async def test_omits_language_param_for_auto_locale(self):
+        """Locale 'auto' should call stream() with no language param for Gladia auto-detection."""
+        mock_track = MagicMock()
+        mock_track.kind = rtc.TrackKind.KIND_AUDIO
+        participant = _make_participant("user_1", audio_track=mock_track)
+        agent = _make_agent_with_room(participants={"pid": participant})
+
+        with patch("gladia_stt_agent.rtc.AudioStream"):
+            agent.start_transcription_for_user("user_1", "auto", "gladia")
+            agent.stt.stream.assert_called_once_with()
+
+            agent.processing_info["user_1"]["task"].cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await agent.processing_info["user_1"]["task"]
+
 
 class TestRunTranscriptionPipeline:
     async def test_cancellation_cleans_up_processing_info(self):