archgw: address drift in prometheus cluster name (#87)

codefromthecrypt · web-flow · commit 36dcfe6b34bd · 2025-09-24T14:12:26.000+09:00
Signed-off-by: Adrian Cole &lt;adrian@tetrate.io&gt;
diff --git a/inference-platforms/archgw/README.md b/inference-platforms/archgw/README.md
@@ -64,7 +64,11 @@ and anything added in Arch Gateway's [wasm filter][archgw-wasm].
   instructions to run from Docker (to avoid nested docker).
 * Traces come from Envoy, whose configuration is written by `archgw`. At the
   moment, this hard-codes aspects including default ports.
+* Prometheus metrics show the cluster as "ollama_host" - the provider_interface
+  plus the first segment of the hostname (dots truncate the rest). The "host"
+  comes from "host.docker.internal".
 * Until [this][openai-responses] resolves, don't use `--use-responses-api`.
+* Until [this][docker-env] resolves, make sure your PATH has /usr/local/bin.
 
 The chat prompt was designed to be idempotent, but the results are not. You may
 see something besides 'South Atlantic Ocean.'.
@@ -78,3 +82,4 @@ Just run it again until we find a way to make the results idempotent.
 [uv]: https://docs.astral.sh/uv/getting-started/installation/
 [openai-responses]: https://github.com/katanemo/archgw/issues/476
 [otel-tui]: https://github.com/ymtdzzz/otel-tui
+[docker-env]: https://github.com/katanemo/archgw/issues/573
diff --git a/inference-platforms/archgw/arch_config.yaml b/inference-platforms/archgw/arch_config.yaml
@@ -8,8 +8,9 @@ listeners:
     timeout: 30s
 
 llm_providers:
+  # Use ollama directly, since we can't inherit OPENAI_BASE_URL etc and need
+  # to hard-code the model anyway.
   - model: ollama/qwen3:0.6b
-    provider_interface: openai
     # This configuration is converted to Envoy and run inside Docker.
     base_url: http://host.docker.internal:11434
     default: true
diff --git a/inference-platforms/archgw/docker-compose-elastic.yml b/inference-platforms/archgw/docker-compose-elastic.yml
@@ -2,8 +2,7 @@ configs:
   # Configuration is simplified from archgw here:
   # https://github.com/katanemo/archgw/blob/main/docs/source/guides/observability/monitoring.rst
   #
-  # Note: The prometheus cluster name for qwen3:0.65b will shows up as '6b'
-  # See https://github.com/katanemo/archgw/issues/504
+  # Note: The cluster name for ollama + host.docker.internal = ollama_host
   prometheus-pump-config:
     content: |
       receivers: