Skip to content
10 changes: 10 additions & 0 deletions backend/app/api/docs/llm/get_llm_call.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Retrieve the status and results of an LLM call job by job ID.

This endpoint allows you to poll for the status and results of an asynchronous LLM call job that was previously initiated via the POST `/llm/call` endpoint.


### Notes

- This endpoint returns both the job status AND the actual LLM response when complete
- LLM responses are also delivered asynchronously via the callback URL (if provided)
- Jobs can be queried at any time after creation
105 changes: 97 additions & 8 deletions backend/app/api/routes/llm.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
import logging
from uuid import UUID

from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, HTTPException

from app.api.deps import AuthContextDep, SessionDep
from app.api.permissions import Permission, require_permission
from app.models import LLMCallRequest, LLMCallResponse, Message
from app.crud.jobs import JobCrud
from app.crud.llm import get_llm_calls_by_job_id
from app.models import (
LLMCallRequest,
LLMCallResponse,
LLMJobImmediatePublic,
LLMJobPublic,
JobStatus,
)
from app.models.llm.response import LLMResponse, Usage
from app.services.llm.jobs import start_job
from app.utils import APIResponse, validate_callback_url, load_description

Expand Down Expand Up @@ -34,7 +44,7 @@ def llm_callback_notification(body: APIResponse[LLMCallResponse]):
@router.post(
"/llm/call",
description=load_description("llm/llm_call.md"),
response_model=APIResponse[Message],
response_model=APIResponse[LLMJobImmediatePublic],
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have we tried running this endpoint manually triggering with Postman or Swagger?

callbacks=llm_callback_router.routes,
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
)
Expand All @@ -43,22 +53,101 @@ def llm_call(
):
"""
Endpoint to initiate an LLM call as a background job.
Returns job information for polling.
"""
project_id = _current_user.project_.id
organization_id = _current_user.organization_.id

if request.callback_url:
validate_callback_url(str(request.callback_url))

start_job(
job_id = start_job(
db=session,
request=request,
project_id=project_id,
organization_id=organization_id,
)

return APIResponse.success_response(
data=Message(
message=f"Your response is being generated and will be delivered via callback."
),
# Fetch job details to return immediate response
job_crud = JobCrud(session=session)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jobcrud.create() should return the job_id no? I think .get can be removed

job = job_crud.get(job_id=job_id)

if not job:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like this line is not required if above is fixed

raise HTTPException(status_code=404, detail="Job not found")

if request.callback_url:
message = "Your response is being generated and will be delivered via callback."
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make the default global message="Your response is being generated and will be delivered via callback." and replace for negative case

else:
message = "Your response is being generated"

job_response = LLMJobImmediatePublic(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the wrapper model is not required

job_id=job.id,
status=job.status.value,
message=message,
job_inserted_at=job.created_at,
job_updated_at=job.updated_at,
)

return APIResponse.success_response(data=job_response)


@router.get(
"/llm/call/{job_id}",
description=load_description("llm/get_llm_call.md"),
response_model=APIResponse[LLMJobPublic],
dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
)
def get_llm_call_status(
_current_user: AuthContextDep,
session: SessionDep,
job_id: UUID,
) -> APIResponse[LLMJobPublic]:
"""
Poll for LLM call job status and results.
Returns job information with nested LLM response when complete.
"""
job_crud = JobCrud(session=session)
job = job_crud.get(job_id=job_id)

if not job:
raise HTTPException(status_code=404, detail="Job not found")

llm_call_response = None
if job.status.value == JobStatus.SUCCESS:
llm_calls = get_llm_calls_by_job_id(
session=session, job_id=job_id, project_id=_current_user.project_.id
)

if llm_calls:
# Get the first LLM call from the list which will be the only call for the job id
# since we initially won't be using this endpoint for llm chains
llm_call = llm_calls[0]

llm_response = LLMResponse(
provider_response_id=llm_call.provider_response_id or "",
conversation_id=llm_call.conversation_id,
provider=llm_call.provider,
model=llm_call.model,
output=llm_call.content,
)

if not llm_call.usage:
raise HTTPException(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's okay if usage data is missing, log a warning instead of 500

status_code=500,
detail="Completed LLM job is missing usage data",
)

llm_call_response = LLMCallResponse(
response=llm_response,
usage=Usage(**llm_call.usage),
provider_raw_response=None,
)

job_response = LLMJobPublic(
job_id=job.id,
status=job.status.value,
llm_response=llm_call_response,
error_message=job.error_message,
)

return APIResponse.success_response(data=job_response)
11 changes: 6 additions & 5 deletions backend/app/crud/llm.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
import base64
import json
from uuid import UUID
from typing import Any, Literal

from uuid import UUID
from sqlmodel import Session, select

from app.core.util import now
import base64
import json
from app.models.llm import LlmCall, LLMCallRequest, ConfigBlob
from app.models.llm.request import (
TextInput,
Expand Down Expand Up @@ -234,13 +235,13 @@ def get_llm_call_by_id(


def get_llm_calls_by_job_id(
session: Session,
job_id: UUID,
session: Session, job_id: UUID, project_id: int
) -> list[LlmCall]:
statement = (
select(LlmCall)
.where(
LlmCall.job_id == job_id,
LlmCall.project_id == project_id,
LlmCall.deleted_at.is_(None),
)
.order_by(LlmCall.created_at.desc())
Expand Down
3 changes: 3 additions & 0 deletions backend/app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@
LLMChainRequest,
LLMChainResponse,
LlmChain,
LLMJobBasePublic,
LLMJobImmediatePublic,
LLMJobPublic,
)

from .message import Message
Expand Down
3 changes: 3 additions & 0 deletions backend/app/models/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,7 @@
AudioOutput,
LLMChainResponse,
IntermediateChainResponse,
LLMJobBasePublic,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these models are not required. There are total 7 fields russian doll-ed amongst the three. Use the strings instead

LLMJobImmediatePublic,
LLMJobPublic,
)
1 change: 1 addition & 0 deletions backend/app/models/llm/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pydantic import HttpUrl, model_validator
from sqlalchemy.dialects.postgresql import JSONB
from sqlmodel import Field, Index, SQLModel, text

from app.core.util import now
from app.models.llm.constants import (
DEFAULT_STT_MODEL,
Expand Down
29 changes: 28 additions & 1 deletion backend/app/models/llm/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@

This module contains structured response models for LLM API calls.
"""
from sqlmodel import SQLModel, Field
from datetime import datetime
from uuid import UUID
from typing import Literal, Annotated

from sqlmodel import SQLModel, Field

from app.models.llm.request import AudioContent, TextContent


Expand Down Expand Up @@ -100,3 +104,26 @@ class IntermediateChainResponse(SQLModel):
default=None,
description="Unmodified raw response from the LLM provider from the current block",
)


# Job response models
class LLMJobBasePublic(SQLModel):
"""Base response model for LLM job information."""

job_id: UUID
status: str # JobStatus from job.py


class LLMJobImmediatePublic(LLMJobBasePublic):
"""Immediate response after creating an LLM job."""

message: str
job_inserted_at: datetime
job_updated_at: datetime


class LLMJobPublic(LLMJobBasePublic):
"""Full job response with nested LLM response when complete."""

llm_response: LLMCallResponse | None = None
error_message: str | None = None
116 changes: 113 additions & 3 deletions backend/app/tests/api/routes/test_llm.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,40 @@
import pytest
from uuid import uuid4
from unittest.mock import patch

from sqlmodel import Session
from fastapi.testclient import TestClient

from app.models import LLMCallRequest
from app.crud import JobCrud
from app.models import Job, JobStatus, JobUpdate
from app.models.llm.response import LLMCallResponse
from app.models.llm.request import (
QueryParams,
LLMCallConfig,
ConfigBlob,
KaapiCompletionConfig,
NativeCompletionConfig,
KaapiCompletionConfig,
QueryParams,
)
from app.models.llm import LLMCallRequest
from app.tests.utils.auth import TestAuthContext
from app.tests.utils.llm import create_llm_job, create_llm_call_with_response


@pytest.fixture
def llm_job(db: Session) -> Job:
return create_llm_job(db)


@pytest.fixture
def llm_response_in_db(
db: Session, llm_job: Job, user_api_key: TestAuthContext
) -> LLMCallResponse:
return create_llm_call_with_response(
db,
job_id=llm_job.id,
project_id=user_api_key.project_id,
organization_id=user_api_key.organization_id,
)


def test_llm_call_success(
Expand Down Expand Up @@ -247,3 +272,88 @@ def test_llm_call_guardrails_bypassed_still_succeeds(
assert "response is being generated" in body["data"]["message"]

mock_start_job.assert_called_once()


def test_get_llm_call_pending(
client: TestClient,
user_api_key_header: dict[str, str],
llm_job,
) -> None:
"""Job in PENDING state returns status with no llm_response."""
response = client.get(
f"/api/v1/llm/call/{llm_job.id}",
headers=user_api_key_header,
)

assert response.status_code == 200
body = response.json()
assert body["success"] is True
assert body["data"]["job_id"] == str(llm_job.id)
assert body["data"]["status"] == "PENDING"
assert body["data"]["llm_response"] is None


def test_get_llm_call_success(
client: TestClient,
db: Session,
user_api_key_header: dict[str, str],
llm_response_in_db: LLMCallResponse,
) -> None:
"""Job in SUCCESS state returns full llm_response with usage."""

JobCrud(db).update(llm_response_in_db.job_id, JobUpdate(status=JobStatus.SUCCESS))

response = client.get(
f"/api/v1/llm/call/{llm_response_in_db.job_id}",
headers=user_api_key_header,
)

assert response.status_code == 200
body = response.json()
assert body["success"] is True
data = body["data"]
assert data["status"] == "SUCCESS"
assert data["llm_response"] is not None
assert data["llm_response"]["response"]["provider_response_id"] == "resp_abc123"
assert data["llm_response"]["response"]["provider"] == "openai"
assert data["llm_response"]["usage"]["input_tokens"] == 10
assert data["llm_response"]["usage"]["output_tokens"] == 5
assert data["llm_response"]["usage"]["total_tokens"] == 15


def test_get_llm_call_failed(
client: TestClient,
db: Session,
user_api_key_header: dict[str, str],
llm_job,
) -> None:
JobCrud(db).update(
llm_job.id,
JobUpdate(status=JobStatus.FAILED, error_message="Provider timeout"),
)

response = client.get(
f"/api/v1/llm/call/{llm_job.id}",
headers=user_api_key_header,
)

assert response.status_code == 200
body = response.json()
assert body["success"] is True
assert body["data"]["status"] == "FAILED"
assert body["data"]["error_message"] == "Provider timeout"
assert body["data"]["llm_response"] is None


def test_get_llm_call_not_found(
client: TestClient,
user_api_key_header: dict[str, str],
) -> None:
"""Non-existent job_id returns 404."""

response = client.get(
f"/api/v1/llm/call/{uuid4()}",
headers=user_api_key_header,
)

assert response.status_code == 404
Loading
Loading