ProjectTech4DevAI · nishika26 · Mar 30, 2026 · Mar 31, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/backend/app/api/docs/llm/get_llm_call.md b/backend/app/api/docs/llm/get_llm_call.md
@@ -0,0 +1,10 @@
+Retrieve the status and results of an LLM call job by job ID.
+
+This endpoint allows you to poll for the status and results of an asynchronous LLM call job that was previously initiated via the POST `/llm/call` endpoint.
+
+
+### Notes
+
+- This endpoint returns both the job status AND the actual LLM response when complete
+- LLM responses are also delivered asynchronously via the callback URL (if provided)
+- Jobs can be queried at any time after creation
diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py
@@ -1,10 +1,20 @@
 import logging
+from uuid import UUID
 
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, HTTPException
 
 from app.api.deps import AuthContextDep, SessionDep
 from app.api.permissions import Permission, require_permission
-from app.models import LLMCallRequest, LLMCallResponse, Message
+from app.crud.jobs import JobCrud
+from app.crud.llm import get_llm_calls_by_job_id
+from app.models import (
+    LLMCallRequest,
+    LLMCallResponse,
+    LLMJobImmediatePublic,
+    LLMJobPublic,
+    JobStatus,
+)
+from app.models.llm.response import LLMResponse, Usage
 from app.services.llm.jobs import start_job
 from app.utils import APIResponse, validate_callback_url, load_description
 
@@ -34,7 +44,7 @@ def llm_callback_notification(body: APIResponse[LLMCallResponse]):
 @router.post(
     "/llm/call",
     description=load_description("llm/llm_call.md"),
-    response_model=APIResponse[Message],
+    response_model=APIResponse[LLMJobImmediatePublic],
     callbacks=llm_callback_router.routes,
     dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
 )
@@ -43,22 +53,101 @@ def llm_call(
 ):
     """
     Endpoint to initiate an LLM call as a background job.
+    Returns job information for polling.
     """
     project_id = _current_user.project_.id
     organization_id = _current_user.organization_.id
 
     if request.callback_url:
         validate_callback_url(str(request.callback_url))
 
-    start_job(
+    job_id = start_job(
         db=session,
         request=request,
         project_id=project_id,
         organization_id=organization_id,
     )
 
-    return APIResponse.success_response(
-        data=Message(
-            message=f"Your response is being generated and will be delivered via callback."
-        ),
+    # Fetch job details to return immediate response
+    job_crud = JobCrud(session=session)
+    job = job_crud.get(job_id=job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+
+    if request.callback_url:
+        message = "Your response is being generated and will be delivered via callback."
+    else:
+        message = "Your response is being generated"
+
+    job_response = LLMJobImmediatePublic(
+        job_id=job.id,
+        status=job.status.value,
+        message=message,
+        job_inserted_at=job.created_at,
+        job_updated_at=job.updated_at,
     )
+
+    return APIResponse.success_response(data=job_response)
+
+
+@router.get(
+    "/llm/call/{job_id}",
+    description=load_description("llm/get_llm_call.md"),
+    response_model=APIResponse[LLMJobPublic],
+    dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
+)
+def get_llm_call_status(
+    _current_user: AuthContextDep,
+    session: SessionDep,
+    job_id: UUID,
+) -> APIResponse[LLMJobPublic]:
+    """
+    Poll for LLM call job status and results.
+    Returns job information with nested LLM response when complete.
+    """
+    job_crud = JobCrud(session=session)
+    job = job_crud.get(job_id=job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+
+    llm_call_response = None
+    if job.status.value == JobStatus.SUCCESS:
+        llm_calls = get_llm_calls_by_job_id(
+            session=session, job_id=job_id, project_id=_current_user.project_.id
+        )
+
+        if llm_calls:
+            # Get the first LLM call from the list which will be the only call for the job id
+            # since we initially won't be using this endpoint for llm chains
+            llm_call = llm_calls[0]
+
+            llm_response = LLMResponse(
+                provider_response_id=llm_call.provider_response_id or "",
+                conversation_id=llm_call.conversation_id,
+                provider=llm_call.provider,
+                model=llm_call.model,
+                output=llm_call.content,
+            )
+
+            if not llm_call.usage:
+                raise HTTPException(
+                    status_code=500,
+                    detail="Completed LLM job is missing usage data",
+                )
+
+            llm_call_response = LLMCallResponse(
+                response=llm_response,
+                usage=Usage(**llm_call.usage),
+                provider_raw_response=None,
+            )
+
+    job_response = LLMJobPublic(
+        job_id=job.id,
+        status=job.status.value,
+        llm_response=llm_call_response,
+        error_message=job.error_message,
+    )
+
+    return APIResponse.success_response(data=job_response)
diff --git a/backend/app/crud/llm.py b/backend/app/crud/llm.py
@@ -1,11 +1,12 @@
 import logging
+import base64
+import json
+from uuid import UUID
 from typing import Any, Literal
 
-from uuid import UUID
 from sqlmodel import Session, select
+
 from app.core.util import now
-import base64
-import json
 from app.models.llm import LlmCall, LLMCallRequest, ConfigBlob
 from app.models.llm.request import (
     TextInput,
@@ -234,13 +235,13 @@ def get_llm_call_by_id(
 
 
 def get_llm_calls_by_job_id(
-    session: Session,
-    job_id: UUID,
+    session: Session, job_id: UUID, project_id: int
 ) -> list[LlmCall]:
     statement = (
         select(LlmCall)
         .where(
             LlmCall.job_id == job_id,
+            LlmCall.project_id == project_id,
             LlmCall.deleted_at.is_(None),
         )
         .order_by(LlmCall.created_at.desc())

diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py
@@ -115,6 +115,9 @@
     LLMChainRequest,
     LLMChainResponse,
     LlmChain,
+    LLMJobBasePublic,
+    LLMJobImmediatePublic,
+    LLMJobPublic,
 )
 
 from .message import Message

diff --git a/backend/app/models/llm/__init__.py b/backend/app/models/llm/__init__.py
@@ -30,4 +30,7 @@
     AudioOutput,
     LLMChainResponse,
     IntermediateChainResponse,
+    LLMJobBasePublic,
+    LLMJobImmediatePublic,
+    LLMJobPublic,
 )
diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py
@@ -7,6 +7,7 @@
 from pydantic import HttpUrl, model_validator
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlmodel import Field, Index, SQLModel, text
+
 from app.core.util import now
 from app.models.llm.constants import (
     DEFAULT_STT_MODEL,

diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py
@@ -3,8 +3,12 @@
 
 This module contains structured response models for LLM API calls.
 """
-from sqlmodel import SQLModel, Field
+from datetime import datetime
+from uuid import UUID
 from typing import Literal, Annotated
+
+from sqlmodel import SQLModel, Field
+
 from app.models.llm.request import AudioContent, TextContent
 
 
@@ -100,3 +104,26 @@ class IntermediateChainResponse(SQLModel):
         default=None,
         description="Unmodified raw response from the LLM provider from the current block",
     )
+
+
+# Job response models
+class LLMJobBasePublic(SQLModel):
+    """Base response model for LLM job information."""
+
+    job_id: UUID
+    status: str  # JobStatus from job.py
+
+
+class LLMJobImmediatePublic(LLMJobBasePublic):
+    """Immediate response after creating an LLM job."""
+
+    message: str
+    job_inserted_at: datetime
+    job_updated_at: datetime
+
+
+class LLMJobPublic(LLMJobBasePublic):
+    """Full job response with nested LLM response when complete."""
+
+    llm_response: LLMCallResponse | None = None
+    error_message: str | None = None
diff --git a/backend/app/tests/api/routes/test_llm.py b/backend/app/tests/api/routes/test_llm.py
@@ -1,15 +1,40 @@
+import pytest
+from uuid import uuid4
 from unittest.mock import patch
 
+from sqlmodel import Session
 from fastapi.testclient import TestClient
 
-from app.models import LLMCallRequest
+from app.crud import JobCrud
+from app.models import Job, JobStatus, JobUpdate
+from app.models.llm.response import LLMCallResponse
 from app.models.llm.request import (
-    QueryParams,
     LLMCallConfig,
     ConfigBlob,
-    KaapiCompletionConfig,
     NativeCompletionConfig,
+    KaapiCompletionConfig,
+    QueryParams,
 )
+from app.models.llm import LLMCallRequest
+from app.tests.utils.auth import TestAuthContext
+from app.tests.utils.llm import create_llm_job, create_llm_call_with_response
+
+
+@pytest.fixture
+def llm_job(db: Session) -> Job:
+    return create_llm_job(db)
+
+
+@pytest.fixture
+def llm_response_in_db(
+    db: Session, llm_job: Job, user_api_key: TestAuthContext
+) -> LLMCallResponse:
+    return create_llm_call_with_response(
+        db,
+        job_id=llm_job.id,
+        project_id=user_api_key.project_id,
+        organization_id=user_api_key.organization_id,
+    )
 
 
 def test_llm_call_success(
@@ -247,3 +272,88 @@ def test_llm_call_guardrails_bypassed_still_succeeds(
         assert "response is being generated" in body["data"]["message"]
 
         mock_start_job.assert_called_once()
+
+
+def test_get_llm_call_pending(
+    client: TestClient,
+    user_api_key_header: dict[str, str],
+    llm_job,
+) -> None:
+    """Job in PENDING state returns status with no llm_response."""
+    response = client.get(
+        f"/api/v1/llm/call/{llm_job.id}",
+        headers=user_api_key_header,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
+    assert body["data"]["job_id"] == str(llm_job.id)
+    assert body["data"]["status"] == "PENDING"
+    assert body["data"]["llm_response"] is None
+
+
+def test_get_llm_call_success(
+    client: TestClient,
+    db: Session,
+    user_api_key_header: dict[str, str],
+    llm_response_in_db: LLMCallResponse,
+) -> None:
+    """Job in SUCCESS state returns full llm_response with usage."""
+
+    JobCrud(db).update(llm_response_in_db.job_id, JobUpdate(status=JobStatus.SUCCESS))
+
+    response = client.get(
+        f"/api/v1/llm/call/{llm_response_in_db.job_id}",
+        headers=user_api_key_header,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
+    data = body["data"]
+    assert data["status"] == "SUCCESS"
+    assert data["llm_response"] is not None
+    assert data["llm_response"]["response"]["provider_response_id"] == "resp_abc123"
+    assert data["llm_response"]["response"]["provider"] == "openai"
+    assert data["llm_response"]["usage"]["input_tokens"] == 10
+    assert data["llm_response"]["usage"]["output_tokens"] == 5
+    assert data["llm_response"]["usage"]["total_tokens"] == 15
+
+
+def test_get_llm_call_failed(
+    client: TestClient,
+    db: Session,
+    user_api_key_header: dict[str, str],
+    llm_job,
+) -> None:
+    JobCrud(db).update(
+        llm_job.id,
+        JobUpdate(status=JobStatus.FAILED, error_message="Provider timeout"),
+    )
+
+    response = client.get(
+        f"/api/v1/llm/call/{llm_job.id}",
+        headers=user_api_key_header,
+    )
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["success"] is True
+    assert body["data"]["status"] == "FAILED"
+    assert body["data"]["error_message"] == "Provider timeout"
+    assert body["data"]["llm_response"] is None
+
+
+def test_get_llm_call_not_found(
+    client: TestClient,
+    user_api_key_header: dict[str, str],
+) -> None:
+    """Non-existent job_id returns 404."""
+
+    response = client.get(
+        f"/api/v1/llm/call/{uuid4()}",
+        headers=user_api_key_header,
+    )
+
+    assert response.status_code == 404