Skip to content
10 changes: 10 additions & 0 deletions backend/app/api/docs/llm/get_llm_call.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Retrieve the status and results of an LLM call job by job ID.

This endpoint allows you to poll for the status and results of an asynchronous LLM call job that was previously initiated via the POST `/llm/call` endpoint.


### Notes

- This endpoint returns both the job status AND the actual LLM response when complete
- LLM responses are also delivered asynchronously via the callback URL (if provided)
- Jobs can be queried at any time after creation
105 changes: 97 additions & 8 deletions backend/app/api/routes/llm.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
import logging
from uuid import UUID

from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, HTTPException

from app.api.deps import AuthContextDep, SessionDep
from app.api.permissions import Permission, require_permission
from app.models import LLMCallRequest, LLMCallResponse, Message
from app.crud.jobs import JobCrud
from app.crud.llm import get_llm_calls_by_job_id
from app.models import (
LLMCallRequest,
LLMCallResponse,
LLMJobImmediatePublic,
LLMJobPublic,
JobStatus,
)
from app.models.llm.response import LLMResponse, Usage
from app.services.llm.jobs import start_job
from app.utils import APIResponse, validate_callback_url, load_description

Expand Down Expand Up @@ -34,7 +44,7 @@ def llm_callback_notification(body: APIResponse[LLMCallResponse]):
@router.post(
"/llm/call",
description=load_description("llm/llm_call.md"),
response_model=APIResponse[Message],
response_model=APIResponse[LLMJobImmediatePublic],
callbacks=llm_callback_router.routes,
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
)
Expand All @@ -43,22 +53,101 @@ def llm_call(
):
"""
Endpoint to initiate an LLM call as a background job.
Returns job information for polling.
"""
project_id = _current_user.project_.id
organization_id = _current_user.organization_.id

if request.callback_url:
validate_callback_url(str(request.callback_url))

start_job(
job_id = start_job(
db=session,
request=request,
project_id=project_id,
organization_id=organization_id,
)

return APIResponse.success_response(
data=Message(
message=f"Your response is being generated and will be delivered via callback."
),
# Fetch job details to return immediate response
job_crud = JobCrud(session=session)
job = job_crud.get(job_id=job_id)

if not job:
raise HTTPException(status_code=404, detail="Job not found")

if request.callback_url:
message = "Your response is being generated and will be delivered via callback."
else:
message = "Your response is being generated"

job_response = LLMJobImmediatePublic(
job_id=job.id,
status=job.status.value,
message=message,
job_inserted_at=job.created_at,
job_updated_at=job.updated_at,
)

return APIResponse.success_response(data=job_response)


@router.get(
"/llm/call/{job_id}",
description=load_description("llm/get_llm_call.md"),
response_model=APIResponse[LLMJobPublic],
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
)
def get_llm_call_status(
_current_user: AuthContextDep,
session: SessionDep,
job_id: UUID,
) -> APIResponse[LLMJobPublic]:
"""
Poll for LLM call job status and results.
Returns job information with nested LLM response when complete.
"""
job_crud = JobCrud(session=session)
job = job_crud.get(job_id=job_id)

if not job:
raise HTTPException(status_code=404, detail="Job not found")

llm_call_response = None
if job.status.value == JobStatus.SUCCESS:
llm_calls = get_llm_calls_by_job_id(
session=session, job_id=job_id, project_id=_current_user.project_.id
)

if llm_calls:
# Get the first LLM call from the list which will be the only call for the job id
# since we initially won't be using this endpoint for llm chains
llm_call = llm_calls[0]

llm_response = LLMResponse(
provider_response_id=llm_call.provider_response_id or "",
conversation_id=llm_call.conversation_id,
provider=llm_call.provider,
model=llm_call.model,
output=llm_call.content,
)

if not llm_call.usage:
raise HTTPException(
status_code=500,
detail="Completed LLM job is missing usage data",
)

llm_call_response = LLMCallResponse(
response=llm_response,
usage=Usage(**llm_call.usage),
provider_raw_response=None,
)

job_response = LLMJobPublic(
job_id=job.id,
status=job.status.value,
llm_response=llm_call_response,
error_message=job.error_message,
)

return APIResponse.success_response(data=job_response)
4 changes: 2 additions & 2 deletions backend/app/crud/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,13 @@ def get_llm_call_by_id(


def get_llm_calls_by_job_id(
session: Session,
job_id: UUID,
session: Session, job_id: UUID, project_id: int
) -> list[LlmCall]:
statement = (
select(LlmCall)
.where(
LlmCall.job_id == job_id,
LlmCall.project_id == project_id,
LlmCall.deleted_at.is_(None),
)
.order_by(LlmCall.created_at.desc())
Expand Down
3 changes: 3 additions & 0 deletions backend/app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@
LLMChainRequest,
LLMChainResponse,
LlmChain,
LLMJobBasePublic,
LLMJobImmediatePublic,
LLMJobPublic,
)

from .message import Message
Expand Down
3 changes: 3 additions & 0 deletions backend/app/models/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,7 @@
AudioOutput,
LLMChainResponse,
IntermediateChainResponse,
LLMJobBasePublic,
LLMJobImmediatePublic,
LLMJobPublic,
)
26 changes: 26 additions & 0 deletions backend/app/models/llm/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

This module contains structured response models for LLM API calls.
"""
from datetime import datetime
from uuid import UUID

from sqlmodel import SQLModel, Field
from typing import Literal, Annotated
from app.models.llm.request import AudioContent, TextContent
Expand Down Expand Up @@ -100,3 +103,26 @@ class IntermediateChainResponse(SQLModel):
default=None,
description="Unmodified raw response from the LLM provider from the current block",
)


# Job response models
class LLMJobBasePublic(SQLModel):
"""Base response model for LLM job information."""

job_id: UUID
status: str # JobStatus from job.py


class LLMJobImmediatePublic(LLMJobBasePublic):
"""Immediate response after creating an LLM job."""

message: str
job_inserted_at: datetime
job_updated_at: datetime


class LLMJobPublic(LLMJobBasePublic):
"""Full job response with nested LLM response when complete."""

llm_response: LLMCallResponse | None = None
error_message: str | None = None
140 changes: 139 additions & 1 deletion backend/app/tests/api/routes/test_llm.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import pytest
from uuid import uuid4
from unittest.mock import patch

from sqlmodel import Session
from fastapi.testclient import TestClient

from app.models import LLMCallRequest
from app.crud import JobCrud
from app.crud.llm import create_llm_call, update_llm_call_response
from app.models import JobType, LLMCallRequest, Job, JobStatus, JobUpdate
from app.models.llm.response import LLMCallResponse
from app.models.llm.request import (
QueryParams,
LLMCallConfig,
Expand All @@ -12,6 +18,52 @@
)


@pytest.fixture
def llm_job(db: Session) -> Job:
crud = JobCrud(db)
return crud.create(job_type=JobType.LLM_API)


@pytest.fixture
def llm_response_in_db(db: Session, llm_job, user_api_key) -> LLMCallResponse:
config_blob = ConfigBlob(
completion=KaapiCompletionConfig(
provider="openai",
params={
"model": "gpt-4o",
"instructions": "You are helpful.",
"temperature": 0.7,
},
type="text",
)
)
llm_call = create_llm_call(
db,
request=LLMCallRequest(
query=QueryParams(input="What is the capital of France?"),
config=LLMCallConfig(blob=config_blob),
),
job_id=llm_job.id,
project_id=user_api_key.project_id,
organization_id=user_api_key.organization_id,
resolved_config=config_blob,
original_provider="openai",
)
update_llm_call_response(
db,
llm_call_id=llm_call.id,
provider_response_id="resp_abc123",
content={"type": "text", "content": {"format": "text", "value": "Paris"}},
usage={
"input_tokens": 10,
"output_tokens": 5,
"total_tokens": 15,
"reasoning_tokens": None,
},
)
return llm_call


def test_llm_call_success(
client: TestClient, user_api_key_header: dict[str, str]
) -> None:
Expand Down Expand Up @@ -247,3 +299,89 @@ def test_llm_call_guardrails_bypassed_still_succeeds(
assert "response is being generated" in body["data"]["message"]

mock_start_job.assert_called_once()


def test_get_llm_call_pending(
client: TestClient,
user_api_key_header: dict[str, str],
llm_job,
) -> None:
"""Job in PENDING state returns status with no llm_response."""
response = client.get(
f"/api/v1/llm/call/{llm_job.id}",
headers=user_api_key_header,
)

assert response.status_code == 200
body = response.json()
assert body["success"] is True
assert body["data"]["job_id"] == str(llm_job.id)
assert body["data"]["status"] == "PENDING"
assert body["data"]["llm_response"] is None


def test_get_llm_call_success(
client: TestClient,
db: Session,
user_api_key_header: dict[str, str],
llm_job,
llm_response_in_db,
) -> None:
"""Job in SUCCESS state returns full llm_response with usage."""

JobCrud(db).update(llm_job.id, JobUpdate(status=JobStatus.SUCCESS))

response = client.get(
f"/api/v1/llm/call/{llm_job.id}",
headers=user_api_key_header,
)

assert response.status_code == 200
body = response.json()
assert body["success"] is True
data = body["data"]
assert data["status"] == "SUCCESS"
assert data["llm_response"] is not None
assert data["llm_response"]["response"]["provider_response_id"] == "resp_abc123"
assert data["llm_response"]["response"]["provider"] == "openai"
assert data["llm_response"]["usage"]["input_tokens"] == 10
assert data["llm_response"]["usage"]["output_tokens"] == 5
assert data["llm_response"]["usage"]["total_tokens"] == 15


def test_get_llm_call_failed(
client: TestClient,
db: Session,
user_api_key_header: dict[str, str],
llm_job,
) -> None:
JobCrud(db).update(
llm_job.id,
JobUpdate(status=JobStatus.FAILED, error_message="Provider timeout"),
)

response = client.get(
f"/api/v1/llm/call/{llm_job.id}",
headers=user_api_key_header,
)

assert response.status_code == 200
body = response.json()
assert body["success"] is True
assert body["data"]["status"] == "FAILED"
assert body["data"]["error_message"] == "Provider timeout"
assert body["data"]["llm_response"] is None


def test_get_llm_call_not_found(
client: TestClient,
user_api_key_header: dict[str, str],
) -> None:
"""Non-existent job_id returns 404."""

response = client.get(
f"/api/v1/llm/call/{uuid4()}",
headers=user_api_key_header,
)

assert response.status_code == 404
Loading