katanemo · Spherrrical · Jun 24, 2026 · Jun 22, 2026
diff --git a/cli/planoai/obs/pricing.py b/cli/planoai/obs/pricing.py
@@ -1,7 +1,8 @@
-"""DigitalOcean Gradient pricing catalog for the obs console.
+"""Model pricing catalog for the obs console.
 
-Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
-Single-source: one fetch at startup, cached for the life of the process.
+Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
+configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
+fetch at startup is cached for the life of the process.
 """
 
 from __future__ import annotations
@@ -14,7 +15,18 @@
 
 import requests
 
-DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
+DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
+MODELS_DEV_URL = "https://models.dev/api.json"
+
+# Backwards-compatible default (DigitalOcean) used when no provider is given.
+DEFAULT_PRICING_URL = DO_PRICING_URL
+DEFAULT_PRICING_PROVIDER = "digitalocean"
+
+_DEFAULT_URLS = {
+    "digitalocean": DO_PRICING_URL,
+    "models.dev": MODELS_DEV_URL,
+}
+
 FETCH_TIMEOUT_SECS = 5.0
 
 
@@ -51,36 +63,52 @@ def sample_models(self, n: int = 5) -> list[str]:
             return list(self._prices.keys())[:n]
 
     @classmethod
-    def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
-        """Fetch pricing from DO's catalog endpoint. On failure, returns an
+    def fetch(
+        cls,
+        provider: str = DEFAULT_PRICING_PROVIDER,
+        url: str | None = None,
+    ) -> "PricingCatalog":
+        """Fetch pricing from the configured catalog. On failure, returns an
         empty catalog (cost column will be blank).
 
-        The catalog endpoint is public — no auth required, no signup — so
-        ``planoai obs`` gets cost data on first run out of the box.
+        ``provider`` selects the parser/default URL: ``digitalocean`` or
+        ``models.dev``. Both catalog endpoints are public — no auth required —
+        so ``planoai obs`` gets cost data on first run out of the box.
         """
+        provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
+        resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
         try:
-            resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
+            resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
             resp.raise_for_status()
             data = resp.json()
         except Exception as exc:  # noqa: BLE001 — best-effort; never fatal
             logger.warning(
-                "DO pricing fetch failed: %s; cost column will be blank.",
+                "%s pricing fetch failed: %s; cost column will be blank.",
+                provider,
                 exc,
             )
             return cls()
 
-        prices = _parse_do_pricing(data)
+        if provider == "models.dev":
+            prices = _parse_models_dev_pricing(data)
+        else:
+            prices = _parse_do_pricing(data)
+
         if not prices:
-            # Dump the first entry's raw shape so we can see which fields DO
-            # actually returned — helps when the catalog adds new fields or
-            # the response doesn't match our parser.
+            # Dump a sample of the raw shape so we can see which fields the
+            # catalog returned — helps when it adds new fields or the response
+            # doesn't match our parser.
             import json as _json
 
-            sample_items = _coerce_items(data)
-            sample = sample_items[0] if sample_items else data
+            if provider == "models.dev" and isinstance(data, dict):
+                sample = next(iter(data.values()), data)
+            else:
+                sample_items = _coerce_items(data)
+                sample = sample_items[0] if sample_items else data
             logger.warning(
-                "DO pricing response had no parseable entries; cost column "
+                "%s pricing response had no parseable entries; cost column "
                 "will be blank. Sample entry: %s",
+                provider,
                 _json.dumps(sample, default=str)[:400],
             )
         return cls(prices)
@@ -278,6 +306,75 @@ def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
     return prices
 
 
+def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
+    """Parse a models.dev ``api.json`` response into a ModelPrice map.
+
+    models.dev shape (top-level object keyed by provider id)::
+
+        {
+          "anthropic": {
+            "models": {
+              "claude-opus-4-5": {
+                "cost": {"input": 5, "output": 25, "cache_read": 0.5}
+              }
+            }
+          },
+          ...
+        }
+
+    ``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
+    per-token rate. First-party providers use bare model keys, so we register
+    both ``provider/model`` (matching Plano's routing names) and the bare model
+    id as a fallback.
+    """
+    prices: dict[str, ModelPrice] = {}
+    if not isinstance(data, dict):
+        return prices
+
+    for provider_id, provider in data.items():
+        if not isinstance(provider, dict):
+            continue
+        models = provider.get("models")
+        if not isinstance(models, dict):
+            continue
+        for model_key, model in models.items():
+            if not isinstance(model, dict):
+                continue
+            cost = model.get("cost")
+            if not isinstance(cost, dict):
+                continue
+            input_pm = _as_float(cost.get("input"))
+            output_pm = _as_float(cost.get("output"))
+            if input_pm is None or output_pm is None:
+                continue
+            # Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
+            if input_pm == 0 and output_pm == 0:
+                continue
+            cached_pm = _as_float(cost.get("cache_read"))
+            price = ModelPrice(
+                input_per_token_usd=input_pm / 1_000_000,
+                output_per_token_usd=output_pm / 1_000_000,
+                cached_input_per_token_usd=(
+                    cached_pm / 1_000_000 if cached_pm is not None else None
+                ),
+            )
+            composite = f"{provider_id}/{model_key}"
+            prices[composite] = price
+            prices.setdefault(composite.lower(), price)
+            prices.setdefault(str(model_key), price)
+            prices.setdefault(str(model_key).lower(), price)
+    return prices
+
+
+def _as_float(value: Any) -> float | None:
+    if value is None:
+        return None
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
+
+
 def _coerce_items(data: Any) -> list[dict]:
     if isinstance(data, list):
         return [x for x in data if isinstance(x, dict)]

diff --git a/cli/planoai/obs_cmd.py b/cli/planoai/obs_cmd.py
@@ -2,9 +2,12 @@
 
 from __future__ import annotations
 
+import logging
+import os
 import time
 
 import rich_click as click
+import yaml
 from rich.console import Console
 from rich.live import Live
 
@@ -15,8 +18,50 @@
     LLMCallStore,
     ObsCollector,
 )
-from planoai.obs.pricing import PricingCatalog
+from planoai.obs.pricing import DEFAULT_PRICING_PROVIDER, PricingCatalog
 from planoai.obs.render import render
+from planoai.utils import find_config_file
+
+logger = logging.getLogger(__name__)
+
+
+def _resolve_pricing_source(
+    config_file: str | None,
+    provider_override: str | None,
+    url_override: str | None,
+) -> tuple[str, str | None]:
+    """Pick the cost pricing source.
+
+    Precedence: explicit CLI overrides > the first ``type: cost`` entry in
+    ``model_metrics_sources`` from the Plano config > the DigitalOcean default.
+    """
+    provider = DEFAULT_PRICING_PROVIDER
+    url: str | None = None
+
+    config_path = find_config_file(file=config_file)
+    if config_path and os.path.exists(config_path):
+        try:
+            with open(config_path, "r") as f:
+                config = yaml.safe_load(f) or {}
+            sources = config.get("model_metrics_sources") or []
+            for source in sources:
+                if isinstance(source, dict) and source.get("type") == "cost":
+                    if source.get("provider"):
+                        provider = str(source["provider"])
+                    if source.get("url"):
+                        url = str(source["url"])
+                    break
+        except Exception as exc:  # noqa: BLE001 — config is optional for obs
+            logger.warning(
+                "could not read pricing source from %s: %s", config_path, exc
+            )
+
+    if provider_override:
+        provider = provider_override
+    if url_override:
+        url = url_override
+
+    return provider, url
 
 
 @click.command(name="obs", help="Live observability console for Plano LLM traffic.")
@@ -48,13 +93,42 @@
     show_default=True,
     help="TUI refresh interval.",
 )
-def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
+@click.option(
+    "--config",
+    "config_file",
+    type=str,
+    default=None,
+    help="Path to the Plano config to read the pricing source from "
+    "(defaults to ./config.yaml or ./plano_config.yaml).",
+)
+@click.option(
+    "--pricing-provider",
+    type=click.Choice(["digitalocean", "models.dev"]),
+    default=None,
+    help="Override the cost pricing provider (otherwise read from config).",
+)
+@click.option(
+    "--pricing-url",
+    type=str,
+    default=None,
+    help="Override the pricing catalog URL (otherwise read from config / provider default).",
+)
+def obs(
+    port: int,
+    host: str,
+    capacity: int,
+    refresh_ms: int,
+    config_file: str | None,
+    pricing_provider: str | None,
+    pricing_url: str | None,
+) -> None:
     console = Console()
+    provider, url = _resolve_pricing_source(config_file, pricing_provider, pricing_url)
     console.print(
-        f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
+        f"[bold {PLANO_COLOR}]planoai obs[/] — loading {provider} pricing catalog...",
         end="",
     )
-    pricing = PricingCatalog.fetch()
+    pricing = PricingCatalog.fetch(provider=provider, url=url)
     if len(pricing):
         sample = ", ".join(pricing.sample_models(3))
         console.print(
@@ -63,7 +137,7 @@ def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
     else:
         console.print(
             " [yellow]no pricing loaded[/] — "
-            "[dim]cost column will be blank (DO catalog unreachable)[/]"
+            f"[dim]cost column will be blank ({provider} catalog unreachable)[/]"
         )
 
     store = LLMCallStore(capacity=capacity)

diff --git a/cli/test/test_obs_pricing.py b/cli/test/test_obs_pricing.py
@@ -144,3 +144,68 @@ def test_parse_do_catalog_divides_large_values_as_per_million():
     prices = _parse_do_pricing(sample)
     assert prices["mystery-model"].input_per_token_usd == 5.0 / 1_000_000
     assert prices["mystery-model"].output_per_token_usd == 15.0 / 1_000_000
+
+
+_MODELS_DEV_SAMPLE = {
+    "anthropic": {
+        "id": "anthropic",
+        "models": {
+            "claude-opus-4-5": {
+                "id": "claude-opus-4-5",
+                "cost": {"input": 5, "output": 25, "cache_read": 0.5},
+            }
+        },
+    },
+    "groq": {
+        "id": "groq",
+        "models": {
+            "llama-3.3-70b-versatile": {
+                "id": "llama-3.3-70b-versatile",
+                "cost": {"input": 0.59, "output": 0.79},
+            },
+            # No cost block → skipped.
+            "whisper-large-v3-turbo": {"id": "whisper-large-v3-turbo"},
+        },
+    },
+}
+
+
+def test_parse_models_dev_composes_provider_keys_and_per_token_rates():
+    from planoai.obs.pricing import _parse_models_dev_pricing
+
+    prices = _parse_models_dev_pricing(_MODELS_DEV_SAMPLE)
+
+    # models.dev cost values are per-million → divided by 1e6.
+    opus = prices["anthropic/claude-opus-4-5"]
+    assert opus.input_per_token_usd == 5 / 1_000_000
+    assert opus.output_per_token_usd == 25 / 1_000_000
+    assert opus.cached_input_per_token_usd == 0.5 / 1_000_000
+
+    # Composite provider/model keys match Plano's routing names.
+    assert "groq/llama-3.3-70b-versatile" in prices
+    # Bare model id registered as a fallback.
+    assert "llama-3.3-70b-versatile" in prices
+    # Models without a cost block are skipped.
+    assert "groq/whisper-large-v3-turbo" not in prices
+
+
+def test_models_dev_catalog_cost_computation():
+    from planoai.obs.pricing import PricingCatalog, _parse_models_dev_pricing
+
+    catalog = PricingCatalog(_parse_models_dev_pricing(_MODELS_DEV_SAMPLE))
+    # 1000 input @ 5e-6 = 0.005; 500 output @ 25e-6 = 0.0125
+    cost = catalog.cost_for_call(_call("anthropic/claude-opus-4-5", 1000, 500))
+    assert cost == round(0.005 + 0.0125, 6)
+
+
+def test_models_dev_skips_zero_rate_entries():
+    from planoai.obs.pricing import _parse_models_dev_pricing
+
+    sample = {
+        "free": {
+            "models": {
+                "promo-model": {"cost": {"input": 0, "output": 0}},
+            }
+        }
+    }
+    assert _parse_models_dev_pricing(sample) == {}
diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml
@@ -582,13 +582,17 @@ properties:
               type: string
               enum:
                 - digitalocean
+                - models.dev
+            url:
+              type: string
+              description: "Optional override for the pricing catalog endpoint. Defaults per provider (digitalocean: DO GenAI catalog; models.dev: https://models.dev/api.json)."
             refresh_interval:
               type: integer
               minimum: 1
               description: "Refresh interval in seconds"
             model_aliases:
               type: object
-              description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
+              description: "Map catalog keys to Plano model names used in routing_preferences. DigitalOcean keys are 'lowercase(creator)/model_id'; models.dev keys are 'creator/model_id'. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
               additionalProperties:
                 type: string
           required: