openradx · medihack · Oct 10, 2025 · Copilot · Oct 10, 2025
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
@@ -35,6 +35,7 @@ x-llm: &llm
     LLAMA_ARG_ENDPOINT_METRICS: 1
     LLAMA_ARG_PORT: 8080
     LLAMA_ARG_N_GPU_LAYERS: 99
+    LLAMA_ARG_JINJA: 1
     NO_PROXY: ${NO_PROXY:-}
   volumes:
     - models_data:/models
@@ -86,13 +87,13 @@ services:
 
   llm_cpu:
     <<: *llm
-    image: ghcr.io/ggml-org/llama.cpp:server-b5170
+    image: ghcr.io/ggml-org/llama.cpp:server
     profiles:
       - cpu
 
   llm_gpu:
     <<: *llm
-    image: ghcr.io/ggml-org/llama.cpp:server-cuda-b5170
+    image: ghcr.io/ggml-org/llama.cpp:server-cuda
     deploy:
       resources:
         reservations:

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,7 @@ dependencies = [
     "djangorestframework>=3.15.2",
     "environs[django]>=14.1.1",
     "humanize>=4.12.1",
+    "instructor>=1.11.3",
     "Markdown>=3.7",
     "openai>=1.64.0",
     "openpyxl>=3.1.5",

diff --git a/radis/chats/utils/chat_client.py b/radis/chats/utils/chat_client.py
@@ -1,6 +1,7 @@
 import logging
 from typing import Iterable
 
+import instructor
 import openai
 from django.conf import settings
 from openai.types.chat import ChatCompletionMessageParam
@@ -49,20 +50,19 @@ def __init__(self) -> None:
         base_url = _get_base_url()
         api_key = settings.EXTERNAL_LLM_PROVIDER_API_KEY
 
-        self._client = openai.OpenAI(base_url=base_url, api_key=api_key)
+        client = openai.OpenAI(base_url=base_url, api_key=api_key)
+        self._client = instructor.from_openai(client)
         self._llm_model_name = settings.LLM_MODEL_NAME
 
     def extract_data(self, prompt: str, schema: type[BaseModel]) -> BaseModel:
         logger.debug("Sending prompt and schema to LLM to extract data.")
         logger.debug("Prompt:\n%s", prompt)
         logger.debug("Schema:\n%s", schema.model_json_schema())
 
-        completion = self._client.beta.chat.completions.parse(
+        result = self._client.chat.completions.create(
             model=self._llm_model_name,
             messages=[{"role": "system", "content": prompt}],
-            response_format=schema,
+            response_model=schema,
         )
-        event = completion.choices[0].message.parsed
-        assert event
-        logger.debug("Received from LLM: %s", event)
-        return event
+        logger.debug("Received from LLM: %s", result)
-        logger.debug("Received from LLM: %s", result)
+        logger.debug("Received from LLM: %s", result)
+        assert result is not None, "LLM returned None for extract_data"
-        logger.debug("Received from LLM: %s", result)
+        logger.debug("Received from LLM: %s", result)
+        assert result is not None, "LLM returned None for extract_data"
+        return result