fix(models): surface empty LiteLlm streaming completions as error event

kevin-hs-sohn · kevin-hs-sohn · commit 7240cdb6da48 · 2026-06-22T18:09:28.000-07:00
Streaming completions where the provider returns a finish_reason but no text + no tool calls currently produce ZERO yielded LlmResponse events: ``aggregated_llm_response`` only gets set when ``(text or reasoning_parts)`` is truthy, and ``aggregated_llm_response_with_tool_call`` needs a function_call. With neither, the loop simply exits and the downstream Runner observes a silent successful empty stream. This pattern is reported across multiple stalled fix attempts: * #5394 — AnthropicLlm never populates finish_reason on LlmResponse * #5006 — retry with resume message when model returns empty response * #5636 — surface error when model returns STOP with empty content * #3618 / #3699 — Handle empty message in LiteLLM response It hits providers under several real conditions: anthropic content_filter, gemini 2.5-flash-lite STOP-with-empty after tool calls, 0-token completions under safety, model_not_found responses normalized to stop, etc. From the user's perspective the agent "successfully" ends a turn with no visible output. Fix - Track ``last_finish_reason`` + ``last_model_version`` across the stream so we can attribute the empty response. - After both ``aggregated_llm_response`` and ``aggregated_llm_response_with_tool_call`` checks, if BOTH are None AND a finish_reason was observed, yield ONE LlmResponse with ``error_code`` set to the mapped finish_reason, ``error_message`` describing the failure mode, and the provider's ``model_version`` preserved. ``usage_metadata`` + ``grounding_metadata`` (if any) attach to that response so callers do not lose them. - Minimum-surface change: the guard only fires when the stream produced no aggregated response AND a finish_reason was observed. Streams that genuinely yield nothing (test doubles, empty iterators) stay byte-identical. Tests - tests/unittests/models/test_litellm.py adds 4 cases: * content_filter-empty → surfaces with SAFETY error_code * stop-empty → surfaces with STOP finish_reason + error_message * normal text stream → empty-guard does NOT fire (regression) * literally-empty stream (no chunks, no finish_reason) → byte-identical zero responses 281 lite_llm tests pass + 1 skip; 0 regressions.
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -2413,6 +2413,16 @@ async def generate_content_async(
       usage_metadata = None
       grounding_metadata = None
       fallback_index = 0
+      # Track the latest finish_reason + model so we can surface an
+      # explicit error event when the stream completes with neither text
+      # nor tool calls (silent-empty pattern). Without this guard the
+      # aggregator simply yields nothing and the downstream Runner sees
+      # zero events — the user-visible "empty turn that succeeded" bug
+      # tracked across #5394 / #5006 / #3618 / user reports against
+      # anthropic, gemini, and other providers under content_filter,
+      # safety, model_not_found, and 0-token-completion conditions.
+      last_finish_reason: str | None = None
+      last_model_version: str | None = None
 
       def _finalize_tool_call_response(
           *, model_version: str, finish_reason: str
@@ -2502,7 +2512,11 @@ def _reset_stream_buffers() -> None:
         part_grounding = _extract_grounding_metadata(part)
         if part_grounding:
           grounding_metadata = part_grounding
+        if getattr(part, "model", None):
+          last_model_version = part.model
         for chunk, finish_reason in _model_response_to_chunk(part):
+          if finish_reason:
+            last_finish_reason = finish_reason
           if isinstance(chunk, FunctionChunk):
             index = chunk.index or fallback_index
             if index not in function_calls:
@@ -2616,6 +2630,41 @@ def _reset_stream_buffers() -> None:
           )
         yield aggregated_llm_response_with_tool_call
 
+      # If we observed a finish_reason but produced no aggregated response
+      # (no text, no tool calls), surface it as an explicit error event so
+      # downstream consumers see actionable signal instead of a silent
+      # zero-yield stream. This is the "empty completion" pattern:
+      # provider returns 200 OK with a finish_reason (content_filter,
+      # safety, length-without-content, model_not_found, 0-token
+      # response, ...) but no text or tool deltas. Tracked across:
+      #   #5394 — AnthropicLlm never populates finish_reason
+      #   #5006 — retry with resume message when model returns empty
+      #   #5636 — surface error when model returns STOP with empty content
+      #   #3618 — Handle empty message in LiteLLM response
+      # Multiple stalled PRs (#5512, #5636, #3699) attempted variants of
+      # this fix. This guard keeps the change minimal: ONLY fires when
+      # the entire stream produced nothing meaningful AND the provider
+      # told us why via finish_reason. Mapped error_code lets the
+      # downstream agent loop react (retry, surface, escalate) without
+      # losing the provider's actual signal.
+      if (
+          last_finish_reason
+          and aggregated_llm_response is None
+          and aggregated_llm_response_with_tool_call is None
+      ):
+        mapped_empty_finish = _map_finish_reason(last_finish_reason)
+        empty_response = LlmResponse(
+            error_code=mapped_empty_finish,
+            error_message=_finish_reason_to_error_message(mapped_empty_finish),
+            finish_reason=mapped_empty_finish,
+            model_version=last_model_version,
+        )
+        if usage_metadata:
+          empty_response.usage_metadata = usage_metadata
+        if grounding_metadata:
+          empty_response.grounding_metadata = grounding_metadata
+        yield empty_response
+
     else:
       response = await self.llm_client.acompletion(**completion_args)
       yield _model_response_to_generate_content_response(response)
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
@@ -5275,3 +5275,147 @@ def test_redact_file_uri_for_log_http_url_keeps_scheme_and_tail():
       _redact_file_uri_for_log("https://example.com/path/file.pdf")
       == "https://<redacted>/file.pdf"
   )
+
+
+# ---------------------------------------------------------------------------
+# Empty-completion guard (fixes #5394 / #5006 / #5636 / #3618 family)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_streaming_empty_completion_with_content_filter_surfaces_error(
+    mock_completion, lite_llm_instance
+):
+  """A stream that ends with finish_reason but no text/tool deltas must
+  surface as an explicit error event, not a silent zero-yield stream.
+
+  Reproduces the "empty completion" pattern reported across
+  #5394 / #5006 / #5636 / #3618: provider returns 200 OK with a
+  finish_reason (content_filter / safety / model_not_found / etc.)
+  but no content. Pre-fix the aggregator simply yielded nothing and
+  the downstream Runner saw zero events — silent successful empty
+  turn from the user's perspective.
+  """
+  stream_chunks = [
+      ModelResponseStream(
+          model="anthropic/claude-opus-4-8",
+          choices=[
+              StreamingChoices(finish_reason="content_filter", delta=Delta())
+          ],
+      ),
+  ]
+  mock_completion.return_value = iter(stream_chunks)
+
+  responses = [
+      response
+      async for response in lite_llm_instance.generate_content_async(
+          LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
+      )
+  ]
+
+  assert len(responses) == 1, (
+      "empty completion with finish_reason must surface exactly one error "
+      f"LlmResponse, got {len(responses)}: {responses}"
+  )
+  empty_response = responses[0]
+  assert empty_response.error_code == types.FinishReason.SAFETY
+  assert empty_response.finish_reason == types.FinishReason.SAFETY
+  assert empty_response.error_message
+  # The model name must propagate so downstream consumers know what failed.
+  assert empty_response.model_version == "anthropic/claude-opus-4-8"
+
+
+@pytest.mark.asyncio
+async def test_streaming_empty_completion_with_stop_finish_reason_still_surfaces(
+    mock_completion, lite_llm_instance
+):
+  """A stream that ends with finish_reason='stop' but no content also
+  surfaces — the model said "I'm done" but produced nothing. This is the
+  most common shape of the silent-empty bug (#5636 specifically called
+  out gemini-2.5-flash-lite returning STOP with empty content after a
+  tool call). Pre-fix, the (text or reasoning_parts) guard skipped
+  finalization for this case and the aggregator yielded nothing.
+  """
+  stream_chunks = [
+      ModelResponseStream(
+          model="google/gemini-3.5-flash",
+          choices=[StreamingChoices(finish_reason="stop", delta=Delta())],
+      ),
+  ]
+  mock_completion.return_value = iter(stream_chunks)
+
+  responses = [
+      response
+      async for response in lite_llm_instance.generate_content_async(
+          LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
+      )
+  ]
+
+  assert len(responses) == 1, (
+      "STOP-with-empty stream must surface exactly one response, got "
+      f"{len(responses)}: {responses}"
+  )
+  empty_response = responses[0]
+  # The finish_reason is preserved so callers see the model's signal
+  # ("I stopped cleanly") + the empty body is also visible (no text
+  # content). error_message names the failure mode so the operator
+  # does not have to dig.
+  assert empty_response.finish_reason == types.FinishReason.STOP
+  assert empty_response.error_message
+  assert empty_response.model_version == "google/gemini-3.5-flash"
+
+
+@pytest.mark.asyncio
+async def test_streaming_text_response_does_not_trigger_empty_guard(
+    mock_completion, lite_llm_instance
+):
+  """Regression guard: a normal text completion must NOT trigger the
+  empty-guard — the existing aggregated_llm_response handles it and
+  exactly one response is yielded with the actual text."""
+  stream_chunks = [
+      ModelResponseStream(
+          choices=[
+              StreamingChoices(
+                  finish_reason=None,
+                  delta=Delta(role="assistant", content="hello"),
+              )
+          ]
+      ),
+      ModelResponseStream(
+          choices=[StreamingChoices(finish_reason="stop", delta=Delta())]
+      ),
+  ]
+  mock_completion.return_value = iter(stream_chunks)
+
+  responses = [
+      response
+      async for response in lite_llm_instance.generate_content_async(
+          LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
+      )
+  ]
+
+  # The text deltas yield partial responses + one final aggregated
+  # response; the empty-guard MUST NOT fire because text was produced.
+  assert len(responses) >= 2
+  final = responses[-1]
+  assert final.error_code is None
+  assert final.finish_reason == types.FinishReason.STOP
+
+
+@pytest.mark.asyncio
+async def test_streaming_no_chunks_and_no_finish_reason_is_byte_identical(
+    mock_completion, lite_llm_instance
+):
+  """The empty-guard ONLY fires when there's a finish_reason. A stream
+  that yields literally nothing (test doubles, etc.) must stay
+  byte-identical to pre-fix — zero responses, no synthesized error."""
+  mock_completion.return_value = iter([])
+
+  responses = [
+      response
+      async for response in lite_llm_instance.generate_content_async(
+          LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
+      )
+  ]
+
+  assert responses == []