Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ repos:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/psf/black
rev: 23.3.0
rev: 24.10.0
hooks:
- id: black
args: ["--line-length", "100", "--preview"]
Expand Down
10 changes: 10 additions & 0 deletions aider/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,16 @@ def get_ex_info(self, ex):
" limiting your requests."
),
)
# Heavily similar to OpenRouter handling: detect Helicone gateway issues
if "helicone" in str(ex).lower():
return ExInfo(
"APIConnectionError",
True,
(
"Helicone or the upstream API provider is down, overloaded or rate"
" limiting your requests."
),
)

# Check for specific non-retryable APIError cases like insufficient credits
if ex.__class__ is litellm.APIError:
Expand Down
165 changes: 165 additions & 0 deletions aider/helicone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""
Helicone model registry caching and lookup.

This module keeps a local cached copy of the Helicone public model registry
and exposes a helper class that returns metadata for a given model in a format
compatible with litellm’s get_model_info expectations.

Helicone models are addressed in aider as:
helicone/<registry-id>

Where <registry-id> typically looks like "openai/gpt-4o" or similar. This
module is conservative about costs (sets None when unknown) and focuses on
returning context limits and provider mapping.
"""

from __future__ import annotations

import json
import time
from pathlib import Path
from typing import Dict, List, Optional

import requests


class HeliconeModelManager:
DEFAULT_ENDPOINT = "https://jawn.helicone.ai/v1/public/model-registry/models"
CACHE_TTL = 60 * 60 * 24 # 24 h

def __init__(self) -> None:
self.cache_dir = Path.home() / ".aider" / "caches"
self.cache_file = self.cache_dir / "helicone_models.json"
self.content: Dict | None = None
self.verify_ssl: bool = True
self._cache_loaded = False

def set_verify_ssl(self, verify_ssl: bool) -> None:
self.verify_ssl = verify_ssl

# Public API ---------------------------------------------------------
def get_model_info(self, model: str) -> Dict:
"""
Return metadata for a model named like 'helicone/<registry-id>'.
Returns an empty dict for unknown models or on fetch failures.
"""
if not model.startswith("helicone/"):
return {}

self._ensure_content()
data = self._get_models_array()
if not data:
return {}

route = model[len("helicone/") :]

# Consider both the exact id and id without any ":suffix".
candidates = {route}
if ":" in route:
candidates.add(route.split(":", 1)[0])

record = next((m for m in data if m.get("id") in candidates), None)
if not record:
return {}

# Prefer endpoint provider if available, otherwise try to infer from id prefix
provider = None
endpoints = record.get("endpoints") or []
if endpoints:
endpoint0 = endpoints[0] or {}
provider = endpoint0.get("provider") or endpoint0.get("providerSlug")
if not provider:
# Infer provider from id like "openai/gpt-4o"
if "/" in record.get("id", ""):
provider = record["id"].split("/", 1)[0]

context_len = record.get("contextLength") or record.get("maxOutput") or None

# Helicone pricing schema may vary; set costs conservatively to None when unknown
pricing = (endpoints[0] or {}).get("pricing") if endpoints else None
input_cost = None
output_cost = None
if isinstance(pricing, dict):
# Some registries store per-token USD as float; otherwise leave None
try:
p = pricing.get("prompt")
input_cost = float(p) if p is not None else None
except Exception:
input_cost = None
try:
c = pricing.get("completion")
output_cost = float(c) if c is not None else None
except Exception:
output_cost = None

return {
"max_input_tokens": context_len,
"max_tokens": context_len,
"max_output_tokens": context_len,
"input_cost_per_token": input_cost,
"output_cost_per_token": output_cost,
# litellm_provider should be the underlying provider; consumers
# can still rewrite the name if routing via Helicone.
"litellm_provider": (provider or ""),
}

def get_all_model_ids(self) -> List[str]:
"""Return a list of all registry ids (without the 'helicone/' prefix)."""
self._ensure_content()
data = self._get_models_array()
if not data:
return []
out: List[str] = []
for m in data:
mid = m.get("id")
if isinstance(mid, str) and mid:
out.append(mid)
return out

# Internal helpers ---------------------------------------------------
def _get_models_array(self) -> Optional[List[Dict]]:
if not self.content:
return None
obj = self.content.get("data") or {}
arr = obj.get("models")
if isinstance(arr, list):
return arr
return None

def _ensure_content(self) -> None:
self._load_cache()
if not self.content:
self._update_cache()

def _load_cache(self) -> None:
if self._cache_loaded:
return
try:
self.cache_dir.mkdir(parents=True, exist_ok=True)
if self.cache_file.exists():
cache_age = time.time() - self.cache_file.stat().st_mtime
if cache_age < self.CACHE_TTL:
try:
self.content = json.loads(self.cache_file.read_text())
except json.JSONDecodeError:
self.content = None
except OSError:
# Cache directory might be unwritable; ignore.
pass
self._cache_loaded = True

def _update_cache(self) -> None:
try:
response = requests.get(self.DEFAULT_ENDPOINT, timeout=10, verify=self.verify_ssl)
if response.status_code == 200:
self.content = response.json()
try:
self.cache_file.write_text(json.dumps(self.content, indent=2))
except OSError:
pass
except Exception as ex: # noqa: BLE001
print(f"Failed to fetch Helicone model registry: {ex}")
try:
self.cache_file.write_text("{}")
except OSError:
pass
56 changes: 52 additions & 4 deletions aider/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from aider import __version__
from aider.dump import dump # noqa: F401
from aider.helicone import HeliconeModelManager
from aider.llm import litellm
from aider.openrouter import OpenRouterModelManager
from aider.sendchat import ensure_alternating_roles, sanity_check_messages
Expand Down Expand Up @@ -157,11 +158,15 @@ def __init__(self):

# Manager for the cached OpenRouter model database
self.openrouter_manager = OpenRouterModelManager()
# Manager for the cached Helicone public registry
self.helicone_manager = HeliconeModelManager()

def set_verify_ssl(self, verify_ssl):
self.verify_ssl = verify_ssl
if hasattr(self, "openrouter_manager"):
self.openrouter_manager.set_verify_ssl(verify_ssl)
if hasattr(self, "helicone_manager"):
self.helicone_manager.set_verify_ssl(verify_ssl)

def _load_cache(self):
if self._cache_loaded:
Expand Down Expand Up @@ -242,6 +247,12 @@ def get_model_info(self, model):
if litellm_info:
return litellm_info

# Helicone models: consult the local Helicone registry cache
if model.startswith("helicone/"):
helicone_info = self.helicone_manager.get_model_info(model)
if helicone_info:
return helicone_info

if not cached_info and model.startswith("openrouter/"):
# First try using the locally cached OpenRouter model database
openrouter_info = self.openrouter_manager.get_model_info(model)
Expand Down Expand Up @@ -413,6 +424,10 @@ def configure_model_settings(self, model):
self.accepts_settings.append("reasoning_effort")

def apply_generic_model_settings(self, model):
# Normalize helicone models to underlying provider/model for rule matching
if model.startswith("helicone/"):
model = model[len("helicone/") :]

if "/o3-mini" in model:
self.edit_format = "diff"
self.use_repo_map = True
Expand Down Expand Up @@ -678,6 +693,13 @@ def fast_validate_environment(self):

model = self.name

# Special handling: helicone/<provider>/<model>
# Require HELICONE_API_KEY for Helicone gateway routing
if model.startswith("helicone/"):
if os.environ.get("HELICONE_API_KEY"):
return dict(keys_in_environment=["HELICONE_API_KEY"], missing_keys=[])
return dict(keys_in_environment=False, missing_keys=["HELICONE_API_KEY"])

pieces = model.split("/")
if len(pieces) > 1:
provider = pieces[0]
Expand Down Expand Up @@ -953,10 +975,27 @@ def send_completion(self, messages, functions, stream, temperature=None):
if self.is_deepseek_r1():
messages = ensure_alternating_roles(messages)

kwargs = dict(
model=self.name,
stream=stream,
)
effective_model = self.name
if effective_model.startswith("helicone/"):
# For Helicone, pass the OpenAI model id (last segment of the registry id)
underlying_id = effective_model[len("helicone/") :]
openai_model_id = underlying_id.split("/")[-1]
kwargs = dict(
model=openai_model_id,
stream=stream,
)
# Route via Helicone gateway and key
kwargs["api_base"] = "https://ai-gateway.helicone.ai"
helicone_key = os.environ.get("HELICONE_API_KEY")
if helicone_key:
kwargs["api_key"] = helicone_key
# Force OpenAI-compatible provider for litellm
kwargs["custom_llm_provider"] = "openai"
else:
kwargs = dict(
model=effective_model,
stream=stream,
)

if self.use_temperature is not False:
if temperature is None:
Expand Down Expand Up @@ -1212,6 +1251,15 @@ def fuzzy_match_models(name):
chat_models.add(fq_model)
chat_models.add(orig_model)

# Include Helicone registry models if available
try:
helicone_ids = model_info_manager.helicone_manager.get_all_model_ids()
for hid in helicone_ids:
chat_models.add(f"helicone/{hid}")
except Exception:
# Be resilient if the cache/network fails
pass

chat_models = sorted(chat_models)
# exactly matching model
# matching_models = [
Expand Down
3 changes: 2 additions & 1 deletion aider/website/docs/config/api-keys.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ is a great place to store your API keys and other provider API environment varia
```bash
GEMINI_API_KEY=foo
OPENROUTER_API_KEY=bar
HELICONE_API_KEY=xyz
DEEPSEEK_API_KEY=baz
```

Expand All @@ -85,6 +86,6 @@ via the `api-key` entry:
api-key:
- gemini=foo # Sets env var GEMINI_API_KEY=foo
- openrouter=bar # Sets env var OPENROUTER_API_KEY=bar
- helicone=xyz # Sets env var HELICONE_API_KEY=xyz
- deepseek=baz # Sets env var DEEPSEEK_API_KEY=baz
```

2 changes: 1 addition & 1 deletion aider/website/docs/llms/anthropic.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ aider --list-models anthropic/
{: .tip }
Anthropic has very low rate limits.
You can access all the Anthropic models via
[OpenRouter](openrouter.md)
[OpenRouter](openrouter.md) or [Helicone](helicone.md)
or [Google Vertex AI](vertex.md)
with more generous rate limits.

Expand Down
3 changes: 3 additions & 0 deletions aider/website/docs/llms/deepseek.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ cd /to/your/project
aider --model deepseek/deepseek-chat
```

{: .tip }
You can also access DeepSeek models via the
[Helicone](helicone.md) gateway. Use the Helicone route, for example: `helicone/deepseek-v3`.
43 changes: 43 additions & 0 deletions aider/website/docs/llms/helicone.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
parent: Connecting to LLMs
nav_order: 505
---

# Helicone AI Gateway

Aider can connect to models via the Helicone gateway and public model registry.
You'll need a `HELICONE_API_KEY`.

First, install aider:

{% include install.md %}

Then configure your API key:

```
export HELICONE_API_KEY=<key> # Mac/Linux
setx HELICONE_API_KEY <key> # Windows, restart shell after setx
```

Start working with aider and Helicone on your codebase:

```bash
# Change directory into your codebase
cd /to/your/project

# Use any model id from the Helicone registry
aider --model helicone/<model-id>

# Example
aider --model helicone/gpt-4o

# List models available from Helicone
aider --list-models helicone/
```

Notes
- Helicone acts as a gateway; aider routes requests through Helicone automatically when you use `helicone/...` models.
- Aider requires only `HELICONE_API_KEY` to use Helicone models.
- Use the model id directly after the `helicone/` prefix (for example `helicone/gpt-4o`).

See [Advanced model settings](/docs/config/adv-model-settings.html#model-settings) for details on per‑model configuration.
5 changes: 5 additions & 0 deletions aider/website/docs/llms/openai.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ You can use `aider --model <model-name>` to use any other OpenAI model.
For example, if you want to use a specific version of GPT-4 Turbo
you could do `aider --model gpt-4-0125-preview`.

{: .tip }
You can also access OpenAI models via the
[Helicone](helicone.md) gateway. This can simplify multi‑provider setup and may
offer different rate limits.

## Reasoning models from other providers

Many of OpenAI's
Expand Down
5 changes: 3 additions & 2 deletions aider/website/docs/llms/xai.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Example:
aider --model xai/grok-3-mini-beta --reasoning-effort high
```



{: .tip }
You can also access xAI Grok models via the
[Helicone](helicone.md) gateway using the model id, for example `helicone/grok-4`.

Loading