bigbluebutton · nathanael-h · Mar 3, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 11, 2026
diff --git a/.env.example b/.env.example
@@ -6,6 +6,12 @@ REDIS_HOST=127.0.0.1
 REDIS_PORT=6789
 REDIS_PASSWORD=
 
+# STT provider: "gladia" (default) or "openai"
+#STT_PROVIDER=gladia
+
+# =============================================================================
+# --- Gladia STT (STT_PROVIDER=gladia) ---
+# =============================================================================
 GLADIA_API_KEY=
 # The following env vars serves as a translation locale mapper between
 # <ISO 639-1> (Gladia) and <ISO 639-1>-<ISO 3166-1> (BBB) locale formats.
@@ -55,3 +61,23 @@ GLADIA_TRANSLATION_LANG_MAP="de:de-DE,en:en-US,es:es-ES,fr:fr-FR,hi:hi-IN,it:it-
 
 #GLADIA_PRE_PROCESSING_AUDIO_ENHANCER=false
 #GLADIA_PRE_PROCESSING_SPEECH_THRESHOLD=0.5
+
+# =============================================================================
+# --- OpenAI STT (STT_PROVIDER=openai) ---
+# Supports the official OpenAI API and any OpenAI-compatible endpoint.
+# =============================================================================
+
+# OpenAI API key (required)
+#OPENAI_API_KEY=
+
+# Transcription model (default: gpt-4o-transcribe; use "whisper-1" for classic Whisper)
+#OPENAI_STT_MODEL=gpt-4o-transcribe
+
+# Base URL override — set this to use a compatible provider (e.g. a local Whisper server)
+#OPENAI_BASE_URL=
+
+#OPENAI_INTERIM_RESULTS=false
+
+# Minimum confidence thresholds (OpenAI does not report confidence; default 0.0 = no filtering)
+#OPENAI_MIN_CONFIDENCE_FINAL=0.0
+#OPENAI_MIN_CONFIDENCE_INTERIM=0.0
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ Final releases will consolidate all intermediate changes in chronological order.
 
 ## UNRELEASED
 
+* feat(openai): add OpenAI STT provider support (official and compatible endpoints)
 * feat(tests): add unit and integration tests with pytest
 * feat(tests): add coverage reporting with pytest-cov
 * feat(tests): add tests for v0.2.0 changes (utils coercions, config redaction, on_track_subscribed fix, new defaults)

diff --git a/README.md b/README.md
@@ -3,18 +3,18 @@
 This application provides Speech-to-Text (STT) for BigBlueButton meetings using LiveKit
 as their audio bridge.
 
-Initially, the only supported STT engine is Gladia through the official  [LiveKit Gladia Plugin](https://docs.livekit.io/agents/integrations/stt/gladia/).
+Supported STT engines:
 
-It'll be expanded in the future to support other STT plugins from the LiveKit Agents
-ecosystem.
+- **Gladia** — via the official [LiveKit Gladia plugin](https://docs.livekit.io/agents/integrations/stt/gladia/) (default)
+- **OpenAI** — via the [LiveKit OpenAI plugin](https://docs.livekit.io/agents/models/stt/openai/); supports the official OpenAI API and any OpenAI-compatible endpoint
 
 ## Getting Started
 
 ### Environment prerequisites
 
 - Python 3.10+
 - A LiveKit instance
-- A Gladia API key
+- A Gladia API key **or** an OpenAI API key (depending on your chosen STT provider)
 - uv:
   - See installation instructions: https://docs.astral.sh/uv/getting-started/installation/
 
@@ -48,13 +48,17 @@ ecosystem.
     LIVEKIT_API_KEY=...
     LIVEKIT_API_SECRET=...
 
-    # Gladia API Key
+    # For Gladia (default provider):
     GLADIA_API_KEY=...
+
+    # For OpenAI (set STT_PROVIDER=openai):
+    # STT_PROVIDER=openai
+    # OPENAI_API_KEY=...
     ```
 
     Feel free to check `.env.example` for any other configurations of interest.
 
-    **All options ingested by the Gladia STT plugin are exposed via env vars**.
+    **All options ingested by the Gladia and OpenAI STT plugins are exposed via env vars**.
 
 ### Running
 
@@ -98,6 +102,30 @@ docker run --network host --rm -it --env-file .env bbb-livekit-stt
 
 Pre-built images are available via GitHub Container Registry as well.
 
+### OpenAI STT provider
+
+Set `STT_PROVIDER=openai` to use OpenAI STT instead of Gladia.
+
+**Official OpenAI API:**
+
+```bash
+STT_PROVIDER=openai
+OPENAI_API_KEY=your-key
+# OPENAI_STT_MODEL=gpt-4o-transcribe  # default; use "whisper-1" for classic Whisper
+```
+
+**OpenAI-compatible endpoint** (e.g. a self-hosted Whisper server):
+
+```bash
+STT_PROVIDER=openai
+OPENAI_API_KEY=any-value
+OPENAI_BASE_URL=http://your-server:8000
+OPENAI_STT_MODEL=your-model-name
+```
+
+> **Note**: OpenAI STT does not support real-time translation. Only the original
+> transcript language is returned, matching the user's BBB speech locale.
+
 ### Development
 
 #### Testing
@@ -114,12 +142,20 @@ Run with coverage:
 uv run pytest tests/ --ignore=tests/integration --cov --cov-report=term-missing
 ```
 
-Integration tests require a real Gladia API key and make live requests to the Gladia service. Set `GLADIA_API_KEY` and run:
+Integration tests require a real API key and make live requests to the STT service.
+
+For Gladia, set `GLADIA_API_KEY` and run:
 
 ```bash
 GLADIA_API_KEY=your-key uv run pytest tests/integration -m integration
 ```
 
+For OpenAI, set `OPENAI_API_KEY` and run:
+
+```bash
+OPENAI_API_KEY=your-key uv run pytest tests/integration -m integration
+```
+
 #### Linting
 
 This project uses [ruff](https://docs.astral.sh/ruff/) for linting and formatting. To check for issues:

diff --git a/config.py b/config.py
@@ -223,6 +223,35 @@ def to_dict(self):
 gladia_config = GladiaConfig()
 
 
+@dataclass
+class OpenAiConfig:
+    api_key: str | None = field(default_factory=lambda: os.getenv("OPENAI_API_KEY"))
+    model: str = field(
+        default_factory=lambda: os.getenv("OPENAI_STT_MODEL", "gpt-4o-transcribe")
+    )
+    base_url: str | None = field(
+        default_factory=lambda: os.getenv("OPENAI_BASE_URL", None)
+    )
+    # OpenAI STT does not return confidence scores; default 0.0 disables filtering
+    min_confidence_final: float = field(
+        default_factory=lambda: _get_float_env("OPENAI_MIN_CONFIDENCE_FINAL", 0.0)
+    )
+    min_confidence_interim: float = field(
+        default_factory=lambda: _get_float_env("OPENAI_MIN_CONFIDENCE_INTERIM", 0.0)
+    )
+    interim_results: bool | None = field(
+        default_factory=lambda: _get_bool_env("OPENAI_INTERIM_RESULTS", None)
+    )
+
+    def to_dict(self):
+        data = {"api_key": self.api_key, "model": self.model, "base_url": self.base_url}
+        return {k: v for k, v in data.items() if v is not None}
+
+
+openai_config = OpenAiConfig()
+stt_provider = os.getenv("STT_PROVIDER", "gladia").lower()
+
+
 def redact_config_values(value: object, key: str | None = None) -> object:
     if key and key.lower() in REDACTED_CONFIG_KEYS:
         return "***REDACTED***" if value not in (None, "") else value
@@ -238,7 +267,9 @@ def redact_config_values(value: object, key: str | None = None) -> object:
 
 def get_redacted_app_config() -> Dict[str, Any]:
     config_payload = {
+        "stt_provider": stt_provider,
         "redis": asdict(redis_config),
         "gladia": asdict(gladia_config),
+        "openai": asdict(openai_config),
     }
     return redact_config_values(config_payload)
diff --git a/main.py b/main.py
@@ -10,7 +10,13 @@
 
 from redis_manager import RedisManager
 from gladia_stt_agent import GladiaSttAgent
-from config import get_redacted_app_config, gladia_config, redis_config
+from config import (
+    get_redacted_app_config,
+    gladia_config,
+    openai_config,
+    redis_config,
+    stt_provider,
+)
 from utils import coerce_min_utterance_length_seconds, coerce_partial_utterances
 
 load_dotenv()
@@ -34,7 +40,14 @@ async def entrypoint(ctx: JobContext):
     _log_startup_configuration()
 
     redis_manager = RedisManager(redis_config)
-    agent = GladiaSttAgent(gladia_config)
+    if stt_provider == "openai":
+        from openai_stt_agent import OpenAiSttAgent
+
+        agent = OpenAiSttAgent(openai_config)
+        active_stt_config = openai_config
+    else:
+        agent = GladiaSttAgent(gladia_config)
+        active_stt_config = gladia_config
 
     async def on_redis_message(message_data: str):
         try:
@@ -54,7 +67,7 @@ async def on_redis_message(message_data: str):
             meeting_id = routing.get("meetingId")
             user_id = routing.get("userId")
 
-            if meeting_id != agent.room.name:
+            if agent.room is None or meeting_id != agent.room.name:
                 return
 
             if event_name == RedisManager.USER_SPEECH_LOCALE_CHANGED_EVT_MSG:
@@ -108,15 +121,16 @@ async def on_final_transcript(
 
         for alternative in event.alternatives:
             if _is_below_min_confidence(
-                alternative, gladia_config.min_confidence_final
+                alternative, active_stt_config.min_confidence_final
             ):
                 logging.debug(
                     f"Discarding final transcript for {participant.identity}: "
-                    f"low confidence ({alternative.confidence} < {gladia_config.min_confidence_final})."
+                    f"low confidence ({alternative.confidence} < {active_stt_config.min_confidence_final})."
                 )
                 continue
 
-            transcript_lang = alternative.language
+            # OpenAI STT may not report a language; fall back to the original lang.
+            transcript_lang = alternative.language or original_lang
             text = alternative.text
             bbb_locale = None
             start_time_adjusted = math.floor(open_time + alternative.start_time)
@@ -186,15 +200,16 @@ async def on_interim_transcript(
 
         for alternative in event.alternatives:
             if _is_below_min_confidence(
-                alternative, gladia_config.min_confidence_interim
+                alternative, active_stt_config.min_confidence_interim
             ):
                 logging.debug(
                     f"Discarding interim transcript for {participant.identity}: "
-                    f"low confidence ({alternative.confidence} < {gladia_config.min_confidence_interim})."
+                    f"low confidence ({alternative.confidence} < {active_stt_config.min_confidence_interim})."
                 )
                 continue
 
-            transcript_lang = alternative.language
+            # OpenAI STT may not report a language; fall back to the original lang.
+            transcript_lang = alternative.language or original_lang
             text = alternative.text
             start_time_adjusted = math.floor(open_time + alternative.start_time)
             end_time_adjusted = math.floor(open_time + alternative.end_time)