diff --git a/inference-platforms/archgw/README.md b/inference-platforms/archgw/README.md index fd327b2..08c6612 100644 --- a/inference-platforms/archgw/README.md +++ b/inference-platforms/archgw/README.md @@ -64,7 +64,11 @@ and anything added in Arch Gateway's [wasm filter][archgw-wasm]. instructions to run from Docker (to avoid nested docker). * Traces come from Envoy, whose configuration is written by `archgw`. At the moment, this hard-codes aspects including default ports. +* Prometheus metrics show the cluster as "ollama_host" - the provider_interface + plus the first segment of the hostname (dots truncate the rest). The "host" + comes from "host.docker.internal". * Until [this][openai-responses] resolves, don't use `--use-responses-api`. +* Until [this][docker-env] resolves, make sure your PATH has /usr/local/bin. The chat prompt was designed to be idempotent, but the results are not. You may see something besides 'South Atlantic Ocean.'. @@ -78,3 +82,4 @@ Just run it again until we find a way to make the results idempotent. [uv]: https://docs.astral.sh/uv/getting-started/installation/ [openai-responses]: https://github.com/katanemo/archgw/issues/476 [otel-tui]: https://github.com/ymtdzzz/otel-tui +[docker-env]: https://github.com/katanemo/archgw/issues/573 diff --git a/inference-platforms/archgw/arch_config.yaml b/inference-platforms/archgw/arch_config.yaml index cf6dabd..da6238a 100644 --- a/inference-platforms/archgw/arch_config.yaml +++ b/inference-platforms/archgw/arch_config.yaml @@ -8,8 +8,9 @@ listeners: timeout: 30s llm_providers: + # Use ollama directly, since we can't inherit OPENAI_BASE_URL etc and need + # to hard-code the model anyway. - model: ollama/qwen3:0.6b - provider_interface: openai # This configuration is converted to Envoy and run inside Docker. base_url: http://host.docker.internal:11434 default: true diff --git a/inference-platforms/archgw/docker-compose-elastic.yml b/inference-platforms/archgw/docker-compose-elastic.yml index 8f04c2c..892faf8 100644 --- a/inference-platforms/archgw/docker-compose-elastic.yml +++ b/inference-platforms/archgw/docker-compose-elastic.yml @@ -2,8 +2,7 @@ configs: # Configuration is simplified from archgw here: # https://github.com/katanemo/archgw/blob/main/docs/source/guides/observability/monitoring.rst # - # Note: The prometheus cluster name for qwen3:0.65b will shows up as '6b' - # See https://github.com/katanemo/archgw/issues/504 + # Note: The cluster name for ollama + host.docker.internal = ollama_host prometheus-pump-config: content: | receivers: