diff --git a/inference-platforms/aigw/README.md b/inference-platforms/aigw/README.md index dac8162..b6e27d2 100644 --- a/inference-platforms/aigw/README.md +++ b/inference-platforms/aigw/README.md @@ -3,8 +3,10 @@ This shows how to use [Envoy AI Gateway][docs] to proxy Ollama, accessible via an OpenAI compatible API. -Envoy AI Gateway [YAML configuration](ai-gateway-local.yaml) is processed and run -by `aigw`, which launches an Envoy proxy to handle requests. OpenTelemetry support +Envoy AI Gateway is automatically configured by OpenAI and OpenTelemetry +environment variables read by `aigw run`, such as `OPENAI_API_KEY`. + +`aigw run` launches an Envoy proxy to handle requests. OpenTelemetry support for GenAI metrics and traces is handled directly in the `aigw` (go) binary. OpenTelemetry traces produced by Envoy AI Gateway follow the [OpenInference specification][openinference]. @@ -32,7 +34,7 @@ Once Envoy AI Gateway is running, use [uv][uv] to make an OpenAI request via [chat.py](../chat.py): ```bash -uv run --exact -q --env-file env.local ../chat.py +OPENAI_BASE_URL=http://localhost:1975/v1 uv run --exact -q --env-file env.local ../chat.py ``` ## Notes diff --git a/inference-platforms/aigw/ai-gateway-local.yaml b/inference-platforms/aigw/ai-gateway-local.yaml deleted file mode 100644 index a20e1aa..0000000 --- a/inference-platforms/aigw/ai-gateway-local.yaml +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright Envoy AI Gateway Authors -# SPDX-License-Identifier: Apache-2.0 -# The full text of the Apache license is available in the LICENSE file at -# the root of the repo. - -# Configuration for running Envoy AI Gateway with an OpenAI compatible -# inference platform, defaulting to Ollama. -# -# Override with the OPENAI_HOST and OPENAI_PORT environment variables. -# For example, to use RamaLama you would set OPENAI_PORT=8080. -# -# TODO: Remove after https://github.com/envoyproxy/ai-gateway/issues/1211 -apiVersion: gateway.networking.k8s.io/v1 -kind: GatewayClass -metadata: - name: aigw-run -spec: - controllerName: gateway.envoyproxy.io/gatewayclass-controller ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: Gateway -metadata: - name: aigw-run - namespace: default -spec: - gatewayClassName: aigw-run - listeners: - - name: http - protocol: HTTP - port: 1975 - infrastructure: - parametersRef: - group: gateway.envoyproxy.io - kind: EnvoyProxy - name: envoy-ai-gateway ---- -apiVersion: gateway.envoyproxy.io/v1alpha1 -kind: EnvoyProxy -metadata: - name: envoy-ai-gateway - namespace: default -spec: - logging: - level: - default: error ---- -apiVersion: aigateway.envoyproxy.io/v1alpha1 -kind: AIGatewayRoute -metadata: - name: aigw-run - namespace: default -spec: - parentRefs: - - name: aigw-run - kind: Gateway - group: gateway.networking.k8s.io - # Simple rule: route everything to Ollama - rules: - - matches: - - headers: - - type: RegularExpression - name: x-ai-eg-model - value: .* - backendRefs: - - name: ollama - namespace: default - timeouts: - request: 120s ---- -apiVersion: gateway.envoyproxy.io/v1alpha1 -kind: Backend -metadata: - name: ollama - namespace: default -spec: - endpoints: - # Use fqdn, not ip, to allow changing to host.docker.internal - - fqdn: - hostname: ${OPENAI_HOST:=127.0.0.1.nip.io} # Resolves to 127.0.0.1 - port: ${OPENAI_PORT:=11434} # Default to Ollama's port ---- -apiVersion: aigateway.envoyproxy.io/v1alpha1 -kind: AIServiceBackend -metadata: - name: ollama - namespace: default -spec: - timeouts: - request: 3m - schema: - name: OpenAI - backendRef: - name: ollama - kind: Backend - group: gateway.envoyproxy.io - namespace: default ---- -# By default, Envoy Gateway sets the buffer limit to 32kiB which is not -# sufficient for AI workloads. This ClientTrafficPolicy sets the buffer limit -# to 50MiB as an example. -# TODO: Remove ofter https://github.com/envoyproxy/ai-gateway/issues/1212 -apiVersion: gateway.envoyproxy.io/v1alpha1 -kind: ClientTrafficPolicy -metadata: - name: client-buffer-limit - namespace: default -spec: - targetRefs: - - group: gateway.networking.k8s.io - kind: Gateway - name: aigw-run - connection: - bufferLimit: 50Mi diff --git a/inference-platforms/aigw/docker-compose.yml b/inference-platforms/aigw/docker-compose.yml index 82131d9..250781c 100644 --- a/inference-platforms/aigw/docker-compose.yml +++ b/inference-platforms/aigw/docker-compose.yml @@ -21,12 +21,10 @@ services: env_file: - env.local environment: - - OPENAI_HOST=host.docker.internal + - OPENAI_BASE_URL=http://host.docker.internal:11434/v1 - OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4318 ports: - "1975:1975" # OpenAI compatible endpoint at /v1 extra_hosts: # localhost:host-gateway trick doesn't work with aigw - "host.docker.internal:host-gateway" - volumes: - - ./ai-gateway-local.yaml:/config.yaml:ro command: ["run", "/config.yaml"] diff --git a/inference-platforms/aigw/env.local b/inference-platforms/aigw/env.local index 933b0c6..699753e 100644 --- a/inference-platforms/aigw/env.local +++ b/inference-platforms/aigw/env.local @@ -1,4 +1,5 @@ -OPENAI_BASE_URL=http://localhost:1975/v1 +# Override default ENV variables for Ollama +OPENAI_BASE_URL=http://localhost:11434/v1 OPENAI_API_KEY=unused CHAT_MODEL=qwen3:0.6B