From 7f20a80dd3afd648d00fe1c63cf04a7ac3b0edc9 Mon Sep 17 00:00:00 2001
From: sborisov88 <sborisov88@gmail.com>
Date: Sun, 22 Feb 2026 03:10:14 +0300
Subject: [PATCH] Fix prompt tokens causing empty transcription output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When promptTokens are provided in DecodingOptions, the prefill cache is
disabled (known limitation). This causes the decoding loop to start at
tokenIndex=0, where startOfPreviousToken is fed to the model. The model
then predicts EOT or produces a low-confidence prediction, triggering
early termination checks (sampleResult.completed or
firstTokenLogProbThreshold) and breaking the loop immediately — resulting
in empty transcription text.

Two fixes:
1. isFirstToken now points to the first actually decoded token after
   the prompt (max(prefilledIndex, initialPromptIndex)) instead of
   tokenIndex 0 during prompt prefill.
2. sampleResult.completed (EOT) is ignored during the prefill phase,
   since the model is being force-fed prompt tokens and its predictions
   are not meaningful for early stopping.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Sources/WhisperKit/Core/TextDecoder.swift | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Sources/WhisperKit/Core/TextDecoder.swift b/Sources/WhisperKit/Core/TextDecoder.swift
index 4d717b6e..9d7c9af8 100644
--- a/Sources/WhisperKit/Core/TextDecoder.swift
+++ b/Sources/WhisperKit/Core/TextDecoder.swift
@@ -764,7 +764,8 @@ open class TextDecoder: TextDecoding, WhisperMLModel {
 
             let isPrefill = tokenIndex < initialPromptIndex - 1 // Prefill stops at the last token of the initial prompt
             let isLastPrefillToken = tokenIndex == initialPromptIndex - 1
-            let isFirstToken = tokenIndex == prefilledIndex
+            let isInPrefillPhase = isPrefill || isLastPrefillToken // tokenIndex < initialPromptIndex
+            let isFirstToken = tokenIndex == max(prefilledIndex, initialPromptIndex) // First actually decoded token (after prompt)
 
             // Check if current index is part of the initial prompt
             if tokenIndex < initialPromptIndex {
@@ -854,8 +855,11 @@ open class TextDecoder: TextDecoding, WhisperMLModel {
                 } else {
                     false
                 }
+            // During prefill phase (processing prompt tokens), skip early termination checks:
+            // - The model is being force-fed prompt tokens, so EOT predictions and low log probs are expected
+            // - Early stopping should only apply to actually decoded tokens after the prompt
             let isSegmentCompleted =
-                sampleResult.completed ||
+                (!isInPrefillPhase && sampleResult.completed) ||
                 currentTokens.count >= Constants.maxTokenContext - 1 ||
                 isFirstTokenLogProbTooLow