From 2624a3aee112426d6ce904d82b4d2e7061cc0040 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Mon, 8 Sep 2025 11:14:26 +0800 Subject: [PATCH 1/2] archgw: bump to latest Signed-off-by: Adrian Cole --- inference-platforms/archgw/README.md | 12 ++++++++++-- inference-platforms/archgw/arch_config.yaml | 7 +++---- .../archgw/docker-compose-elastic.yml | 3 +-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/inference-platforms/archgw/README.md b/inference-platforms/archgw/README.md index 36e0ffb..65ac136 100644 --- a/inference-platforms/archgw/README.md +++ b/inference-platforms/archgw/README.md @@ -14,10 +14,17 @@ Start ollama and the otel collector via this repository's [README](../../README. ## Run Arch Gateway Arch Gateway is a python command that internally runs Docker. Hence, you need a -working Docker configuration. Run `archgw` using `uvx` from [uv][uv]. +working Docker configuration. Run `archgw` using `uv run` from [uv][uv] to ensure +Python is available: ```bash -uvx archgw up --service archgw --foreground +uv run --with archgw -- archgw up arch_config.yaml +``` + +When finished, clean up like this: + +```bash +uv run --with archgw -- archgw down ``` ## Start Prometheus Scraping @@ -71,3 +78,4 @@ Just run it again until we find a way to make the results idempotent. [archgw-wasm]: https://github.com/katanemo/archgw/blob/main/arch/README.md [uv]: https://docs.astral.sh/uv/getting-started/installation/ [openai-responses]: https://github.com/katanemo/archgw/issues/476 +[otel-tui]: https://github.com/ymtdzzz/otel-tui diff --git a/inference-platforms/archgw/arch_config.yaml b/inference-platforms/archgw/arch_config.yaml index 5a3f623..cf6dabd 100644 --- a/inference-platforms/archgw/arch_config.yaml +++ b/inference-platforms/archgw/arch_config.yaml @@ -1,4 +1,4 @@ -version: "0.1-beta" +version: v0.1.0 listeners: egress_traffic: @@ -8,11 +8,10 @@ listeners: timeout: 30s llm_providers: - - name: qwen3:0.6b + - model: ollama/qwen3:0.6b provider_interface: openai # This configuration is converted to Envoy and run inside Docker. - endpoint: host.docker.internal:11434 - model: qwen3:0.6b + base_url: http://host.docker.internal:11434 default: true tracing: diff --git a/inference-platforms/archgw/docker-compose-elastic.yml b/inference-platforms/archgw/docker-compose-elastic.yml index ab78648..8f04c2c 100644 --- a/inference-platforms/archgw/docker-compose-elastic.yml +++ b/inference-platforms/archgw/docker-compose-elastic.yml @@ -48,8 +48,7 @@ configs: services: # prometheus-pump is an OpenTelemetry Collector that scrapes Prometheus metrics prometheus-pump: - # TODO: docker.elastic.co/elastic-agent/elastic-otel-collector:9.0.3 - image: docker.elastic.co/elastic-agent/elastic-otel-collector:9.0.3-SNAPSHOT + image: docker.elastic.co/elastic-agent/elastic-otel-collector:9.1.3 container_name: prometheus-pump command: [ "--config=/etc/otel/config.yaml", From 93861e0c48504773c6cffbc7ad57d6d64aeb9dac Mon Sep 17 00:00:00 2001 From: Adrian Cole <64215+codefromthecrypt@users.noreply.github.com> Date: Mon, 8 Sep 2025 14:06:29 +0800 Subject: [PATCH 2/2] Update inference-platforms/archgw/README.md Co-authored-by: Anuraag (Rag) Agrawal --- inference-platforms/archgw/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inference-platforms/archgw/README.md b/inference-platforms/archgw/README.md index 65ac136..fd327b2 100644 --- a/inference-platforms/archgw/README.md +++ b/inference-platforms/archgw/README.md @@ -14,8 +14,7 @@ Start ollama and the otel collector via this repository's [README](../../README. ## Run Arch Gateway Arch Gateway is a python command that internally runs Docker. Hence, you need a -working Docker configuration. Run `archgw` using `uv run` from [uv][uv] to ensure -Python is available: +working Docker configuration. Run `archgw` using `uv run` from [uv][uv]: ```bash uv run --with archgw -- archgw up arch_config.yaml