run-llama · marcusschiesser · Apr 1, 2025 · Mar 28, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/llama-index-server/README.md b/llama-index-server/README.md
@@ -1,14 +1,21 @@
 # LlamaIndex Server
 
-LlamaIndexServer is a FastAPI application that allows you to quickly launch your workflow as an API server.
+LlamaIndexServer is a FastAPI-based application that allows you to quickly launch your [LlamaIndex Workflows](https://docs.llamaindex.ai/en/stable/module_guides/workflow/#workflows) and [Agent Workflows](https://docs.llamaindex.ai/en/stable/understanding/agent/multi_agent/) as an API server with an optional chat UI. It provides a complete environment for running LlamaIndex workflows with both API endpoints and a user interface for interaction.
+
+## Features
+
+- Serving a workflow as a chatbot
+- Built on FastAPI for high performance and easy API development
+- Optional built-in chat UI
+- Prebuilt development code
 
 ## Installation
 
 ```bash
 pip install llama-index-server
 ```
 
-## Usage
+## Quick Start
 
 ```python
 # main.py
@@ -32,24 +39,92 @@ def create_workflow() -> Workflow:
     )
 
 
-# Create an API server the workflow
+# Create an API server for the workflow
 app = LlamaIndexServer(
-    workflow_factory=create_workflow  # Supports Workflow or AgentWorkflow
+    workflow_factory=create_workflow,  # Supports Workflow or AgentWorkflow
+    env="dev",  # Enable development mode
+    include_ui=True,  # Include chat UI
+    starter_questions=["What can you do?", "How do I use this?"],
+    verbose=True
 )
 ```
 
-## Running the server
+## Running the Server
 
 - In the same directory as `main.py`, run the following command to start the server:
 
   ```bash
   fastapi dev
   ```
 
-- Making a request to the server
+- Making a request to the server:
 
   ```bash
   curl -X POST "http://localhost:8000/api/chat" -H "Content-Type: application/json" -d '{"message": "What is the weather in Tokyo?"}'
   ```
 
 - See the API documentation at `http://localhost:8000/docs`
+- Access the chat UI at `http://localhost:8000/` (Make sure you set the `env="dev"` or `include_ui=True` in the server configuration)
+
+## Configuration Options
+
+The LlamaIndexServer accepts the following configuration parameters:
+
+- `workflow_factory`: A callable that creates a workflow instance for each request
+- `logger`: Optional logger instance (defaults to uvicorn logger)
+- `use_default_routers`: Whether to include default routers (chat, static file serving)
+- `env`: Environment setting ('dev' enables CORS and UI by default)
+- `include_ui`: Whether to include the chat UI
+- `starter_questions`: List of starter questions for the chat UI
+- `verbose`: Enable verbose logging
+- `api_prefix`: API route prefix (default: "/api")
+- `server_url`: The deployment URL of the server (default is None)
+- `ui_path`: Path for downloaded UI static files (default: ".ui")
+
+## Default Routers and Features
+
+### Chat Router
+
+The server includes a default chat router at `/api/chat` for handling chat interactions.
+
+### Static File Serving
+
+- The server automatically mounts the `data` and `output` folders at `{server_url}{api_prefix}/files/data` (default: `/api/files/data`) and `{server_url}{api_prefix}/files/output` (default: `/api/files/output`) respectively.
+- Your workflows can use both folders to store and access files. As a convention, the `data` folder is used for documents that are ingested and the `output` folder is used for documents that are generated by the workflow.
+- The example workflows from `create-llama` (see below) are following this pattern.
+
+### Chat UI
+
+When enabled, the server provides a chat interface at the root path (`/`) with:
+
+- Configurable starter questions
+- Real-time chat interface
+- API endpoint integration
+
+## Development Mode
+
+In development mode (`env="dev"`), the server:
+
+- Enables CORS for all origins
+- Automatically includes the chat UI
+- Provides more verbose logging
+
+## API Endpoints
+
+The server provides the following default endpoints:
+
+- `/api/chat`: Chat interaction endpoint
+- `/api/files/data/*`: Access to data directory files
+- `/api/files/output/*`: Access to output directory files
+
+## Best Practices
+
+1. Always provide a workflow factory that creates fresh workflow instances
+2. Use environment variables for sensitive configuration
+3. Enable verbose logging during development
+4. Configure CORS appropriately for your deployment environment
+5. Use starter questions to guide users in the chat UI
+
+## Getting Started with a New Project
+
+Want to start a new project with LlamaIndexServer? Check out our [create-llama](https://github.com/run-llama/create-llama) tool to quickly generate a new project with LlamaIndexServer.
diff --git a/llama-index-server/llama_index/server/api/callbacks/__init__.py b/llama-index-server/llama_index/server/api/callbacks/__init__.py
@@ -1,4 +1,5 @@
 from llama_index.server.api.callbacks.base import EventCallback
+from llama_index.server.api.callbacks.llamacloud import LlamaCloudFileDownload
 from llama_index.server.api.callbacks.source_nodes import SourceNodesFromToolCall
 from llama_index.server.api.callbacks.suggest_next_questions import (
     SuggestNextQuestions,
@@ -8,4 +9,5 @@
     "EventCallback",
     "SourceNodesFromToolCall",
     "SuggestNextQuestions",
+    "LlamaCloudFileDownload",
 ]
diff --git a/llama-index-server/llama_index/server/api/callbacks/llamacloud.py b/llama-index-server/llama_index/server/api/callbacks/llamacloud.py
@@ -0,0 +1,39 @@
+import logging
+from typing import Any, List
+
+from fastapi import BackgroundTasks
+from llama_index.core.schema import NodeWithScore
+from llama_index.server.api.callbacks.base import EventCallback
+from llama_index.server.services.llamacloud.file import LlamaCloudFileService
+
+logger = logging.getLogger("uvicorn")
+
+
+class LlamaCloudFileDownload(EventCallback):
+    """
+    Processor for handling LlamaCloud file downloads from source nodes.
+    """
+
+    def __init__(self, background_tasks: BackgroundTasks) -> None:
+        self.background_tasks = background_tasks
+
+    async def run(self, event: Any) -> Any:
+        if hasattr(event, "to_response"):
+            event_response = event.to_response()
+            if event_response.get("type") == "sources" and hasattr(event, "nodes"):
+                await self._process_response_nodes(event.nodes)
+        return event
+
+    async def _process_response_nodes(self, source_nodes: List[NodeWithScore]) -> None:
+        try:
+            LlamaCloudFileService.download_files_from_nodes(
+                source_nodes, self.background_tasks
+            )
+        except ImportError:
+            pass
+
+    @classmethod
+    def from_default(
+        cls, background_tasks: BackgroundTasks
+    ) -> "LlamaCloudFileDownload":
+        return cls(background_tasks=background_tasks)
diff --git a/llama-index-server/llama_index/server/api/models.py b/llama-index-server/llama_index/server/api/models.py
@@ -3,11 +3,11 @@
 from enum import Enum
 from typing import Any, Dict, List, Optional
 
-from pydantic import BaseModel, Field, field_validator
-
 from llama_index.core.schema import NodeWithScore
 from llama_index.core.types import ChatMessage, MessageRole
 from llama_index.core.workflow import Event
+from llama_index.server.settings import server_settings
+from pydantic import BaseModel, Field, field_validator
 
 logger = logging.getLogger("uvicorn")
 
@@ -29,6 +29,7 @@ def to_llamaindex_message(self) -> ChatMessage:
 
 class ChatRequest(BaseModel):
     messages: List[ChatAPIMessage]
+    data: Optional[Any] = None
     config: Optional[ChatConfig] = ChatConfig()
 
     @field_validator("messages")
@@ -98,13 +99,11 @@ def from_source_node(cls, source_node: NodeWithScore) -> "SourceNodes":
 
     @classmethod
     def get_url_from_metadata(
-        cls, metadata: Dict[str, Any], data_dir: Optional[str] = None
+        cls,
+        metadata: Dict[str, Any],
+        data_dir: Optional[str] = None,
     ) -> Optional[str]:
-        url_prefix = os.getenv("FILESERVER_URL_PREFIX")
-        if not url_prefix:
-            logger.warning(
-                "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server"
-            )
+        url_prefix = server_settings.file_server_url_prefix
         if data_dir is None:
             data_dir = "data"
         file_name = metadata.get("file_name")

diff --git a/llama-index-server/llama_index/server/api/routers/chat.py b/llama-index-server/llama_index/server/api/routers/chat.py
@@ -1,20 +1,23 @@
 import asyncio
+import inspect
 import logging
+import os
 from typing import AsyncGenerator, Callable, Union
 
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, BackgroundTasks, HTTPException
 from fastapi.responses import StreamingResponse
-
 from llama_index.core.agent.workflow.workflow_events import AgentStream
 from llama_index.core.workflow import StopEvent, Workflow
 from llama_index.server.api.callbacks import (
     SourceNodesFromToolCall,
     SuggestNextQuestions,
 )
 from llama_index.server.api.callbacks.base import EventCallback
+from llama_index.server.api.callbacks.llamacloud import LlamaCloudFileDownload
 from llama_index.server.api.callbacks.stream_handler import StreamHandler
 from llama_index.server.api.models import ChatRequest
 from llama_index.server.api.utils.vercel_stream import VercelStreamResponse
+from llama_index.server.services.llamacloud import LlamaCloudFileService
 
 
 def chat_router(
@@ -24,20 +27,29 @@ def chat_router(
     router = APIRouter(prefix="/chat")
 
     @router.post("")
-    async def chat(request: ChatRequest) -> StreamingResponse:
+    async def chat(
+        request: ChatRequest,
+        background_tasks: BackgroundTasks,
+    ) -> StreamingResponse:
         try:
             user_message = request.messages[-1].to_llamaindex_message()
             chat_history = [
                 message.to_llamaindex_message() for message in request.messages[:-1]
             ]
-            workflow = workflow_factory()
+            # detect if the workflow factory has chat_request as a parameter
+            factory_sig = inspect.signature(workflow_factory)
+            if "chat_request" in factory_sig.parameters:
+                workflow = workflow_factory(chat_request=request)
+            else:
+                workflow = workflow_factory()
             workflow_handler = workflow.run(
                 user_msg=user_message.content,
                 chat_history=chat_history,
             )
 
             callbacks: list[EventCallback] = [
                 SourceNodesFromToolCall(),
+                LlamaCloudFileDownload(background_tasks),
             ]
             if request.config and request.config.next_question_suggestions:
                 callbacks.append(SuggestNextQuestions(request))
@@ -53,6 +65,28 @@ async def chat(request: ChatRequest) -> StreamingResponse:
             logger.error(e)
             raise HTTPException(status_code=500, detail=str(e))
 
+    if LlamaCloudFileService.is_configured():
+
+        @router.get("/config/llamacloud")
+        async def chat_llama_cloud_config() -> dict:
+            if not os.getenv("LLAMA_CLOUD_API_KEY"):
+                raise HTTPException(
+                    status_code=500, detail="LlamaCloud API KEY is not configured"
+                )
+            projects = LlamaCloudFileService.get_all_projects_with_pipelines()
+            pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME")
+            project = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
+            pipeline_config = None
+            if pipeline and project:
+                pipeline_config = {
+                    "pipeline": pipeline,
+                    "project": project,
+                }
+            return {
+                "projects": projects,
+                "pipeline": pipeline_config,
+            }
+
     return router
 
 

diff --git a/llama-index-server/llama_index/server/chat_ui.py b/llama-index-server/llama_index/server/chat_ui.py
@@ -5,7 +5,7 @@
 
 import requests
 
-CHAT_UI_VERSION = "0.0.3"
+CHAT_UI_VERSION = "0.0.5"
 
 
 def download_chat_ui(