Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions inference-platforms/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# run like this: uv run --exact -q --env-file .env agent.py
# /// script
# dependencies = [
# "openai-agents",
# "httpx",
# "mcp",
# "elastic-opentelemetry",
# "openinference-instrumentation-openai-agents",
# "opentelemetry-instrumentation-httpx",
# "openinference-instrumentation-mcp",
# ]
# ///
# ruff: noqa: E402
from opentelemetry.instrumentation import auto_instrumentation

# This must precede any other imports you want to instrument!
auto_instrumentation.initialize()

import asyncio
import os
from datetime import datetime, timedelta

from agents import (
Agent,
OpenAIProvider,
RunConfig,
Runner,
Tool,
)
from agents.mcp import MCPServerStreamableHttp, MCPUtil


async def run_agent(tools: list[Tool]):
model_name = os.getenv("AGENT_MODEL", "gpt-5-nano")
model = OpenAIProvider(use_responses=False).get_model(model_name)
agent = Agent(
name="flight-search-agent",
model=model,
tools=tools,
)

next_week = (datetime.now() + timedelta(weeks=1)).strftime("%Y-%m-%d")
result = await Runner.run(
starting_agent=agent,
input=f"Give me the best flight from New York to Kota Kinabalu on {next_week}",
run_config=RunConfig(workflow_name="flight search"),
)
print(result.final_output)


async def main():
mcp_url = os.getenv("MCP_URL", "https://mcp.kiwi.com")
async with MCPServerStreamableHttp(
{
"url": mcp_url,
"timeout": 30.0,
},
cache_tools_list=True,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a yolo comment since I didn't check it at all, but it wouldn't surprise me if this enables the background polling you saw

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah will try!

client_session_timeout_seconds=60.0,
) as server:
tools = await server.list_tools()
util = MCPUtil()
tools = [util.to_function_tool(tool, server, False) for tool in tools]
await run_agent(tools)


if __name__ == "__main__":
asyncio.run(main())
31 changes: 26 additions & 5 deletions inference-platforms/aigw/README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
# Envoy AI Gateway

This shows how to use [Envoy AI Gateway][docs] to proxy Ollama, accessible via an
OpenAI compatible API.

Envoy AI Gateway is automatically configured by OpenAI and OpenTelemetry
environment variables read by `aigw run`, such as `OPENAI_API_KEY`.
This shows how to use [Envoy AI Gateway][docs] to proxy LLM and MCP servers,
specifically Ollama and Kiwi flight search.

Envoy AI Gateway exposes OpenAI and MCP compatible endpoints with configurable
backends. It is automatically configured by OpenAI and OpenTelemetry
environment variables read by `aigw run`, such as `OPENAI_API_KEY`. In the case
of MCP, it uses the canonical JSON format like this:

```json
{
"mcpServers": {
"kiwi": {
"type": "http",
"url": "https://mcp.kiwi.com"
}
}
}
```

`aigw run` launches an Envoy proxy to handle requests. OpenTelemetry support
for GenAI metrics and traces is handled directly in the `aigw` (go) binary.
Expand Down Expand Up @@ -33,10 +46,18 @@ docker compose down
Once Envoy AI Gateway is running, use [uv][uv] to make an OpenAI request via
[chat.py](../chat.py):

### Chat Completion

```bash
OPENAI_BASE_URL=http://localhost:1975/v1 uv run --exact -q --env-file env.local ../chat.py
```

### MCP Agent

```bash
OPENAI_BASE_URL=http://localhost:1975/v1 MCP_URL=http://localhost:1975/mcp uv run --exact -q --env-file env.local ../agent.py
```

## Notes

Here are some constraints about the Envoy AI Gateway implementation:
Expand Down
22 changes: 19 additions & 3 deletions inference-platforms/aigw/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
configs:
# MCP servers configuration for aigw
mcp-config:
content: |
{
"mcpServers": {
"kiwi": {
"type": "http",
"url": "https://mcp.kiwi.com"
}
}
}

services:
ollama-pull:
image: alpine/ollama
Expand All @@ -24,7 +37,10 @@ services:
- OPENAI_BASE_URL=http://host.docker.internal:11434/v1
- OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4318
ports:
- "1975:1975" # OpenAI compatible endpoint at /v1
extra_hosts: # localhost:host-gateway trick doesn't work with aigw
- "1975:1975" # OpenAI compatible endpoint at /v1, MCP server at /mcp
configs:
- source: mcp-config
target: /etc/aigw/mcp-servers.json
extra_hosts: # localhost:host-gateway trick doesn't work with aigw
- "host.docker.internal:host-gateway"
command: ["run", "/config.yaml"]
command: ["run", "--mcp-config", "/etc/aigw/mcp-servers.json"]
3 changes: 2 additions & 1 deletion inference-platforms/aigw/env.local
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Override default ENV variables for Ollama
OPENAI_BASE_URL=http://localhost:11434/v1
OPENAI_API_KEY=unused
CHAT_MODEL=qwen3:0.6B
CHAT_MODEL=qwen3:0.6b
AGENT_MODEL=qwen3:1.7b

# OpenTelemetry configuration
OTEL_SERVICE_NAME=aigw
Expand Down
10 changes: 7 additions & 3 deletions inference-platforms/chat.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# run like this: uv run --exact -q --env-file .env chat.py
# /// script
# dependencies = [
# "openai",
Expand All @@ -6,13 +7,16 @@
# "opentelemetry-instrumentation-httpx"
# ]
# ///
# ruff: noqa: E402
from opentelemetry.instrumentation import auto_instrumentation

# This must precede any other imports you want to instrument!
auto_instrumentation.initialize()

import argparse
import os

import openai
from opentelemetry.instrumentation import auto_instrumentation

auto_instrumentation.initialize()

model = os.getenv("CHAT_MODEL", "gpt-4o-mini")

Expand Down