Fix ollama support for Kodu when muxing (#1022)

aponcedeleonch · web-flow · commit e6600f675476 · 2025-02-12T14:40:06.000+02:00
Muixing was failing for 2 reasons:

1. Sometimes we return OpenAI format
from ollama provider. Before we were assuming that everything that
was returned from ollama provider had ollama format.
2. The OpenAI format returned from ollama provider had an invalid
`created` field.
diff --git a/src/codegate/muxing/adapter.py b/src/codegate/muxing/adapter.py
@@ -158,7 +158,12 @@ def _format_ollama(self, chunk: str) -> str:
             ollama_chunk = ChatResponse(**chunk_dict)
             open_ai_chunk = OLlamaToModel.normalize_chat_chunk(ollama_chunk)
             return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
-        except Exception:
+        except Exception as e:
+            # Sometimes we receive an OpenAI formatted chunk from ollama. Specifically when
+            # talking to Cline or Kodu. If that's the case we use the format_openai function.
+            if "data:" in chunk:
+                return self._format_openai(chunk)
+            logger.warning(f"Error formatting Ollama chunk: {chunk}. Error: {e}")
             return chunk
 
     def _format_antropic(self, chunk: str) -> str:
diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
@@ -8,6 +8,7 @@
 
 from codegate.clients.clients import ClientType
 from codegate.providers.base import BaseCompletionHandler
+from codegate.providers.ollama.adapter import OLlamaToModel
 
 logger = structlog.get_logger("codegate")
 
@@ -24,29 +25,9 @@ async def ollama_stream_generator(  # noqa: C901
                 # the correct format and start to handle multiple clients
                 # in a more robust way.
                 if client_type in [ClientType.CLINE, ClientType.KODU]:
-                    # First get the raw dict from the chunk
                     chunk_dict = chunk.model_dump()
-                    # Create response dictionary in OpenAI-like format
-                    response = {
-                        "id": f"chatcmpl-{chunk_dict.get('created_at', '')}",
-                        "object": "chat.completion.chunk",
-                        "created": chunk_dict.get("created_at"),
-                        "model": chunk_dict.get("model"),
-                        "choices": [
-                            {
-                                "index": 0,
-                                "delta": {
-                                    "content": chunk_dict.get("message", {}).get("content", ""),
-                                    "role": chunk_dict.get("message", {}).get("role", "assistant"),
-                                },
-                                "finish_reason": (
-                                    chunk_dict.get("done_reason")
-                                    if chunk_dict.get("done", False)
-                                    else None
-                                ),
-                            }
-                        ],
-                    }
+                    model_response = OLlamaToModel.normalize_chat_chunk(chunk)
+                    response = model_response.model_dump()
                     # Preserve existing type or add default if missing
                     response["type"] = chunk_dict.get("type", "stream")