improve SSE /stream endpoint and clients

Pablo Marin · Pablo Marin · commit df4c296b89f3 · 2025-01-09T17:05:25.000Z
diff --git a/15-FastAPI-API.ipynb b/15-FastAPI-API.ipynb
@@ -224,18 +224,35 @@
     "        resp.raise_for_status()\n",
     "        client = SSEClient(resp)\n",
     "        for event in client.events():\n",
-    "            if event.event == 'tool_start':\n",
-    "                print(\"\\n[Tool Start]\", event.data)\n",
-    "            elif event.event == 'tool_end':\n",
-    "                print(\"\\n[Tool End]\", event.data)\n",
-    "            elif event.event == 'partial':\n",
-    "                process_partial_text(event.data)\n",
-    "            elif event.event == 'end':\n",
-    "                print(f\"\\n[Done Streaming] Final text: {event.data}\")\n",
-    "            elif event.event == 'error':\n",
-    "                print(f\"\\n[SSE Error] {event.data}\")\n",
+    "            evt_type = event.event\n",
+    "            evt_data = event.data\n",
+    "\n",
+    "            if evt_type == 'metadata':\n",
+    "                info = json.loads(evt_data)\n",
+    "                print(f\"\\n[Metadata] run_id={info.get('run_id', '')}\")\n",
+    "\n",
+    "            elif evt_type == 'data':\n",
+    "                # The server is sending partial chunk(s) as a \"data\" event\n",
+    "                # so we treat it as partial text to display:\n",
+    "                process_partial_text(evt_data)\n",
+    "\n",
+    "            elif evt_type == 'on_tool_start':\n",
+    "                print(f\"\\n[Tool Start] {evt_data}\")\n",
+    "\n",
+    "            elif evt_type == 'on_tool_end':\n",
+    "                print(f\"\\n[Tool End] {evt_data}\")\n",
+    "\n",
+    "            elif evt_type == 'end':\n",
+    "                # This signals no more data will follow.\n",
+    "                print(f\"\\n[Done Streaming] Final text: {evt_data}\")\n",
+    "\n",
+    "            elif evt_type == 'error':\n",
+    "                # The server encountered an exception or error\n",
+    "                print(f\"\\n[SSE Error] {evt_data}\")\n",
+    "\n",
     "            else:\n",
-    "                print(f\"\\n[Unrecognized event: {event.event}]\", event.data)\n",
+    "                # Some unexpected event name\n",
+    "                print(f\"\\n[Unrecognized event={evt_type}] {evt_data}\")\n",
     "\n"
    ]
   },
diff --git a/apps/backend/fastapi/app/server.py b/apps/backend/fastapi/app/server.py
@@ -19,8 +19,7 @@
 csv_file_path = "data/all-states-history.csv"
 api_file_path = "data/openapi_kraken.json"
 
-##########################################################
-## Uncomment this section to run locally
+######### Uncomment this section to run locally##########
 # current_file = Path(__file__).resolve()
 # library_path = current_file.parents[4]
 # data_path = library_path / "data"
@@ -181,39 +180,75 @@ async def batch(req: BatchRequest):
 # Streaming Endpoint
 # -----------------------------------------------------------------------------
 @app.post("/stream")
-async def stream(req: AskRequest):
+async def stream_endpoint(req: AskRequest):
+    """
+    Stream partial chunks from the chain in SSE format.
+    
+    SSE event structure:
+    - event: "metadata" (OPTIONAL) – any run-specific metadata
+    - event: "data" – a chunk of text
+    - event: "end" – signals no more data
+    - event: "on_tool_start" - signals the begin of use of a tool
+    - event: "on_tool_end" - signals the end of use of a tool
+    - event: "error" – signals an error
+    """
     logger.info("[/stream] Called with user_input=%s, thread_id=%s", req.user_input, req.thread_id)
 
     if not graph_async:
         logger.error("Graph not compiled yet.")
         raise HTTPException(status_code=500, detail="Graph not compiled yet.")
 
-    config = {"configurable": {"thread_id": req.thread_id or str(uuid.uuid4())}}
+    run_id = req.thread_id or str(uuid.uuid4())
+    config = {"configurable": {"thread_id": run_id}}
     inputs = {"messages": [("human", req.user_input)]}
 
     async def event_generator():
-        accumulated_text = ""
         try:
+            yield {
+                "event": "metadata",
+                "data": json.dumps({"run_id": run_id})
+            }
+
+            accumulated_text = ""
             async for event in graph_async.astream_events(inputs, config, version="v2"):
-                if event["event"] == "on_chat_model_stream" and event["metadata"].get("langgraph_node") == "agent":
-                    chunk_text = event["data"]["chunk"].content
-                    accumulated_text += chunk_text
-                    yield {"event": "partial", "data": chunk_text}
+                if event["event"] == "on_chat_model_stream":
+                    if event["metadata"].get("langgraph_node") == "agent":
+                        chunk_text = event["data"]["chunk"].content
+                        accumulated_text += chunk_text
+
+                        yield {
+                            "event": "data",
+                            "data": chunk_text  # partial chunk
+                        }
+
                 elif event["event"] == "on_tool_start":
-                    yield {"event": "tool_start", "data": f"Starting {event.get('name','')}"}
+                    yield {"event": "on_tool_start", "data": f"Tool Start: {event.get('name', '')}"}
+
                 elif event["event"] == "on_tool_end":
-                    yield {"event": "tool_end", "data": f"Done {event.get('name','')}"}
+                    yield {"event": "on_tool_end", "data": f"Tool End: {event.get('name', '')}"}
+
                 elif event["event"] == "on_chain_end" and event.get("name") == "LangGraph":
+                    # If "FINISH" is the next step
                     if event["data"]["output"].get("next") == "FINISH":
                         yield {"event": "end", "data": accumulated_text}
-                    return
+                    return  # Stop iteration
+
         except Exception as ex:
             logger.exception("[/stream] Error streaming events")
-            yield {"event": "error", "data": str(ex)}
+            # SSE "error" event
+            yield {
+                "event": "error",
+                "data": json.dumps({
+                    "status_code": 500,
+                    "message": str(ex)
+                })
+            }
+            raise
 
     return EventSourceResponse(event_generator(), media_type="text/event-stream")
 
 
+
 # -----------------------------------------------------------------------------
 # Main Entrypoint
 # -----------------------------------------------------------------------------
diff --git a/apps/frontend/app/helpers/streamlit_helpers.py b/apps/frontend/app/helpers/streamlit_helpers.py
@@ -71,28 +71,21 @@ def get_or_create_ids():
 
 def consume_api(url, user_query, session_id, user_id):
     """
-    Send a POST request to the FastAPI backend at `url` (the SSE /stream endpoint),
+    Send a POST request to the FastAPI backend at `url` (/stream endpoint),
     and consume the SSE stream using sseclient-py.
 
     The server is expected to return events like:
-        {"event": "partial", "data": "..."}
+        {"event": "metadata", "data": "..."}
+        {"event": "data", "data": "..."}
+        {"event": "on_tool_start", "data": "..."}
+        {"event": "on_tool_end", "data": "..."}
         {"event": "end", "data": "..."}
-        {"event": "tool_start", "data": "..."}
-        {"event": "tool_end", "data": "..."}
-        {"event": "error", "data": "some error text"}
-
-    This function yields text chunks (e.g., partial content) as they arrive.
-
-    :param url: SSE /stream endpoint
-    :param user_query: The user query text to send
-    :param session_id: A unique ID representing the conversation
-    :param user_id: The user ID (not strictly used in this minimal example)
-    :yield: Text chunks that Streamlit can display incrementally
+        {"event": "error", "data": "..."}
     """
     headers = {"Content-Type": "application/json"}
     payload = {
         "user_input": user_query,
-        "thread_id": session_id  # Typically reusing session_id as conversation ID
+        "thread_id": session_id
     }
 
     logger.info(
@@ -104,43 +97,50 @@ def consume_api(url, user_query, session_id, user_id):
     try:
         with requests.post(url, json=payload, headers=headers, stream=True) as resp:
             resp.raise_for_status()
-            logger.info("SSE stream opened successfully with status code %d.", resp.status_code)
+            logger.info("SSE stream opened with status code: %d", resp.status_code)
 
-            # Use SSEClient to parse the stream
             client = SSEClient(resp)
             for event in client.events():
-                if not event.data.strip():
-                    # Skip keep-alive messages or empty lines
+                if not event.data:
+                    # Skip empty lines
                     continue
 
                 evt_type = event.event
                 evt_data = event.data
                 logger.debug("Received SSE event: %s, data: %s", evt_type, evt_data)
 
-                # Switch on event type from the server
-                if evt_type == "partial":
-                    # Yield partial text; can be streamed in real-time
+                if evt_type == "metadata":
+                    # Possibly parse run_id from the JSON
+                    # e.g. { "run_id": "...some uuid..." }
+                    info = json.loads(evt_data)
+                    run_id = info.get("run_id", "")
+                    # For streamlit, you might store it as session state, etc.
+                    # st.write(f"New run_id: {run_id}")
+
+                elif evt_type == "data":
+                    # The server is sending partial tokens as "data"
+                    # We can yield them so Streamlit can display incrementally
                     yield evt_data
-                
-                elif evt_type == "tool_start":
-                    # Display tool start
-                    # e.g. [Tool Start] Starting documents_retrieval
-                    # yield f"\n[Tool Start] {evt_data}\n"
+
+                elif evt_type == "on_tool_start":
+                    # Optionally display: yield or do a Streamlit update
+                    # yield f"[Tool Start] {evt_data}"
                     pass
 
-                elif evt_type == "tool_end":
-                    # Display tool end
-                    # e.g. [Tool End] Done documents_retrieval
-                    # yield f"\n[Tool End] {evt_data}\n"
+                elif evt_type == "on_tool_end":
+                    # yield f"[Tool End] {evt_data}"
                     pass
-                    
+
                 elif evt_type == "end":
-                    # Yield final accumulated text without leading newline
+                    # This is the final text. 
+                    # Typically you might do a final display or update the UI
                     yield evt_data
+
                 elif evt_type == "error":
+                    # The server had an error
                     yield f"[SSE Error] {evt_data}"
+
                 else:
-                    # Unrecognized event
                     yield f"[Unrecognized event: {evt_type}] {evt_data}"
 
     except requests.exceptions.HTTPError as err: