Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 114 additions & 17 deletions cli/planoai/obs/pricing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""DigitalOcean Gradient pricing catalog for the obs console.
"""Model pricing catalog for the obs console.

Ported loosely from ``crates/brightstaff/src/router/model_metrics.rs::fetch_do_pricing``.
Single-source: one fetch at startup, cached for the life of the process.
Mirrors ``crates/brightstaff/src/router/model_metrics.rs``. The source is
configurable: ``digitalocean`` (DO GenAI catalog) or ``models.dev``. A single
fetch at startup is cached for the life of the process.
"""

from __future__ import annotations
Expand All @@ -14,7 +15,18 @@

import requests

DEFAULT_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
DO_PRICING_URL = "https://api.digitalocean.com/v2/gen-ai/models/catalog"
MODELS_DEV_URL = "https://models.dev/api.json"

# Backwards-compatible default (DigitalOcean) used when no provider is given.
DEFAULT_PRICING_URL = DO_PRICING_URL
DEFAULT_PRICING_PROVIDER = "digitalocean"

_DEFAULT_URLS = {
"digitalocean": DO_PRICING_URL,
"models.dev": MODELS_DEV_URL,
}

FETCH_TIMEOUT_SECS = 5.0


Expand Down Expand Up @@ -51,36 +63,52 @@ def sample_models(self, n: int = 5) -> list[str]:
return list(self._prices.keys())[:n]

@classmethod
def fetch(cls, url: str = DEFAULT_PRICING_URL) -> "PricingCatalog":
"""Fetch pricing from DO's catalog endpoint. On failure, returns an
def fetch(
cls,
provider: str = DEFAULT_PRICING_PROVIDER,
url: str | None = None,
) -> "PricingCatalog":
"""Fetch pricing from the configured catalog. On failure, returns an
empty catalog (cost column will be blank).

The catalog endpoint is public — no auth required, no signup — so
``planoai obs`` gets cost data on first run out of the box.
``provider`` selects the parser/default URL: ``digitalocean`` or
``models.dev``. Both catalog endpoints are public — no auth required —
so ``planoai obs`` gets cost data on first run out of the box.
"""
provider = (provider or DEFAULT_PRICING_PROVIDER).strip().lower()
resolved_url = url or _DEFAULT_URLS.get(provider, DO_PRICING_URL)
try:
resp = requests.get(url, timeout=FETCH_TIMEOUT_SECS)
resp = requests.get(resolved_url, timeout=FETCH_TIMEOUT_SECS)
resp.raise_for_status()
data = resp.json()
except Exception as exc: # noqa: BLE001 — best-effort; never fatal
logger.warning(
"DO pricing fetch failed: %s; cost column will be blank.",
"%s pricing fetch failed: %s; cost column will be blank.",
provider,
exc,
)
return cls()

prices = _parse_do_pricing(data)
if provider == "models.dev":
prices = _parse_models_dev_pricing(data)
else:
prices = _parse_do_pricing(data)

if not prices:
# Dump the first entry's raw shape so we can see which fields DO
# actually returned — helps when the catalog adds new fields or
# the response doesn't match our parser.
# Dump a sample of the raw shape so we can see which fields the
# catalog returned — helps when it adds new fields or the response
# doesn't match our parser.
import json as _json

sample_items = _coerce_items(data)
sample = sample_items[0] if sample_items else data
if provider == "models.dev" and isinstance(data, dict):
sample = next(iter(data.values()), data)
else:
sample_items = _coerce_items(data)
sample = sample_items[0] if sample_items else data
logger.warning(
"DO pricing response had no parseable entries; cost column "
"%s pricing response had no parseable entries; cost column "
"will be blank. Sample entry: %s",
provider,
_json.dumps(sample, default=str)[:400],
)
return cls(prices)
Expand Down Expand Up @@ -278,6 +306,75 @@ def _parse_do_pricing(data: Any) -> dict[str, ModelPrice]:
return prices


def _parse_models_dev_pricing(data: Any) -> dict[str, ModelPrice]:
"""Parse a models.dev ``api.json`` response into a ModelPrice map.

models.dev shape (top-level object keyed by provider id)::

{
"anthropic": {
"models": {
"claude-opus-4-5": {
"cost": {"input": 5, "output": 25, "cache_read": 0.5}
}
}
},
...
}

``cost.*`` values are USD per *million* tokens, so we divide by 1e6 to get a
per-token rate. First-party providers use bare model keys, so we register
both ``provider/model`` (matching Plano's routing names) and the bare model
id as a fallback.
"""
prices: dict[str, ModelPrice] = {}
if not isinstance(data, dict):
return prices

for provider_id, provider in data.items():
if not isinstance(provider, dict):
continue
models = provider.get("models")
if not isinstance(models, dict):
continue
for model_key, model in models.items():
if not isinstance(model, dict):
continue
cost = model.get("cost")
if not isinstance(cost, dict):
continue
input_pm = _as_float(cost.get("input"))
output_pm = _as_float(cost.get("output"))
if input_pm is None or output_pm is None:
continue
# Skip 0-rate entries so cost falls back to `—` rather than $0.0000.
if input_pm == 0 and output_pm == 0:
continue
cached_pm = _as_float(cost.get("cache_read"))
price = ModelPrice(
input_per_token_usd=input_pm / 1_000_000,
output_per_token_usd=output_pm / 1_000_000,
cached_input_per_token_usd=(
cached_pm / 1_000_000 if cached_pm is not None else None
),
)
composite = f"{provider_id}/{model_key}"
prices[composite] = price
prices.setdefault(composite.lower(), price)
prices.setdefault(str(model_key), price)
prices.setdefault(str(model_key).lower(), price)
return prices


def _as_float(value: Any) -> float | None:
if value is None:
return None
try:
return float(value)
except (TypeError, ValueError):
return None


def _coerce_items(data: Any) -> list[dict]:
if isinstance(data, list):
return [x for x in data if isinstance(x, dict)]
Expand Down
84 changes: 79 additions & 5 deletions cli/planoai/obs_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

from __future__ import annotations

import logging
import os
import time

import rich_click as click
import yaml
from rich.console import Console
from rich.live import Live

Expand All @@ -15,8 +18,50 @@
LLMCallStore,
ObsCollector,
)
from planoai.obs.pricing import PricingCatalog
from planoai.obs.pricing import DEFAULT_PRICING_PROVIDER, PricingCatalog
from planoai.obs.render import render
from planoai.utils import find_config_file

logger = logging.getLogger(__name__)


def _resolve_pricing_source(
config_file: str | None,
provider_override: str | None,
url_override: str | None,
) -> tuple[str, str | None]:
"""Pick the cost pricing source.

Precedence: explicit CLI overrides > the first ``type: cost`` entry in
``model_metrics_sources`` from the Plano config > the DigitalOcean default.
"""
provider = DEFAULT_PRICING_PROVIDER
url: str | None = None

config_path = find_config_file(file=config_file)
if config_path and os.path.exists(config_path):
try:
with open(config_path, "r") as f:
config = yaml.safe_load(f) or {}
sources = config.get("model_metrics_sources") or []
for source in sources:
if isinstance(source, dict) and source.get("type") == "cost":
if source.get("provider"):
provider = str(source["provider"])
if source.get("url"):
url = str(source["url"])
break
except Exception as exc: # noqa: BLE001 — config is optional for obs
logger.warning(
"could not read pricing source from %s: %s", config_path, exc
)

if provider_override:
provider = provider_override
if url_override:
url = url_override

return provider, url


@click.command(name="obs", help="Live observability console for Plano LLM traffic.")
Expand Down Expand Up @@ -48,13 +93,42 @@
show_default=True,
help="TUI refresh interval.",
)
def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
@click.option(
"--config",
"config_file",
type=str,
default=None,
help="Path to the Plano config to read the pricing source from "
"(defaults to ./config.yaml or ./plano_config.yaml).",
)
@click.option(
"--pricing-provider",
type=click.Choice(["digitalocean", "models.dev"]),
default=None,
help="Override the cost pricing provider (otherwise read from config).",
)
@click.option(
"--pricing-url",
type=str,
default=None,
help="Override the pricing catalog URL (otherwise read from config / provider default).",
)
def obs(
port: int,
host: str,
capacity: int,
refresh_ms: int,
config_file: str | None,
pricing_provider: str | None,
pricing_url: str | None,
) -> None:
console = Console()
provider, url = _resolve_pricing_source(config_file, pricing_provider, pricing_url)
console.print(
f"[bold {PLANO_COLOR}]planoai obs[/] — loading DO pricing catalog...",
f"[bold {PLANO_COLOR}]planoai obs[/] — loading {provider} pricing catalog...",
end="",
)
pricing = PricingCatalog.fetch()
pricing = PricingCatalog.fetch(provider=provider, url=url)
if len(pricing):
sample = ", ".join(pricing.sample_models(3))
console.print(
Expand All @@ -63,7 +137,7 @@ def obs(port: int, host: str, capacity: int, refresh_ms: int) -> None:
else:
console.print(
" [yellow]no pricing loaded[/] — "
"[dim]cost column will be blank (DO catalog unreachable)[/]"
f"[dim]cost column will be blank ({provider} catalog unreachable)[/]"
)

store = LLMCallStore(capacity=capacity)
Expand Down
65 changes: 65 additions & 0 deletions cli/test/test_obs_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,68 @@ def test_parse_do_catalog_divides_large_values_as_per_million():
prices = _parse_do_pricing(sample)
assert prices["mystery-model"].input_per_token_usd == 5.0 / 1_000_000
assert prices["mystery-model"].output_per_token_usd == 15.0 / 1_000_000


_MODELS_DEV_SAMPLE = {
"anthropic": {
"id": "anthropic",
"models": {
"claude-opus-4-5": {
"id": "claude-opus-4-5",
"cost": {"input": 5, "output": 25, "cache_read": 0.5},
}
},
},
"groq": {
"id": "groq",
"models": {
"llama-3.3-70b-versatile": {
"id": "llama-3.3-70b-versatile",
"cost": {"input": 0.59, "output": 0.79},
},
# No cost block → skipped.
"whisper-large-v3-turbo": {"id": "whisper-large-v3-turbo"},
},
},
}


def test_parse_models_dev_composes_provider_keys_and_per_token_rates():
from planoai.obs.pricing import _parse_models_dev_pricing

prices = _parse_models_dev_pricing(_MODELS_DEV_SAMPLE)

# models.dev cost values are per-million → divided by 1e6.
opus = prices["anthropic/claude-opus-4-5"]
assert opus.input_per_token_usd == 5 / 1_000_000
assert opus.output_per_token_usd == 25 / 1_000_000
assert opus.cached_input_per_token_usd == 0.5 / 1_000_000

# Composite provider/model keys match Plano's routing names.
assert "groq/llama-3.3-70b-versatile" in prices
# Bare model id registered as a fallback.
assert "llama-3.3-70b-versatile" in prices
# Models without a cost block are skipped.
assert "groq/whisper-large-v3-turbo" not in prices


def test_models_dev_catalog_cost_computation():
from planoai.obs.pricing import PricingCatalog, _parse_models_dev_pricing

catalog = PricingCatalog(_parse_models_dev_pricing(_MODELS_DEV_SAMPLE))
# 1000 input @ 5e-6 = 0.005; 500 output @ 25e-6 = 0.0125
cost = catalog.cost_for_call(_call("anthropic/claude-opus-4-5", 1000, 500))
assert cost == round(0.005 + 0.0125, 6)


def test_models_dev_skips_zero_rate_entries():
from planoai.obs.pricing import _parse_models_dev_pricing

sample = {
"free": {
"models": {
"promo-model": {"cost": {"input": 0, "output": 0}},
}
}
}
assert _parse_models_dev_pricing(sample) == {}
6 changes: 5 additions & 1 deletion config/plano_config_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -582,13 +582,17 @@ properties:
type: string
enum:
- digitalocean
- models.dev
url:
type: string
description: "Optional override for the pricing catalog endpoint. Defaults per provider (digitalocean: DO GenAI catalog; models.dev: https://models.dev/api.json)."
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
model_aliases:
type: object
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
description: "Map catalog keys to Plano model names used in routing_preferences. DigitalOcean keys are 'lowercase(creator)/model_id'; models.dev keys are 'creator/model_id'. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
additionalProperties:
type: string
required:
Expand Down
Loading
Loading