Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions cli/test/test_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,3 +823,98 @@ def test_apply_kimi_code_provider_defaults_injects_user_agent():
apply_kimi_code_provider_defaults(provider)
assert provider["base_url"] == "https://api.kimi.com/coding/v1"
assert provider["headers"]["User-Agent"] == "KimiCLI/1.3"


def _load_schema():
from jsonschema import validate

with open("../config/plano_config_schema.yaml", "r") as f:
return validate, yaml.safe_load(f.read())


def test_schema_accepts_capabilities_and_signal_routing():
"""Capability/signal-aware routing fields validate against the schema."""
validate, schema = _load_schema()
config = yaml.safe_load("""
version: v0.4.0
listeners:
- type: model
name: model_1
address: 0.0.0.0
port: 12000
model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
default: true
- model: anthropic/claude-opus-4-1-128k
access_key: $ANTHROPIC_API_KEY
capabilities:
context_window: 128000
- model: openai/llama-3.3-70b-vision
base_url: https://api.custom-provider.com
access_key: $CUSTOM_API_KEY
capabilities:
context_window: 128000
supports_vision: true
supports_image_generation: false
supports_audio_out: false
max_output_tokens: 8192
routing_preferences:
- name: long document analysis
description: summarizing or querying very large documents
models:
- anthropic/claude-opus-4-1-128k
- openai/gpt-4o
selection_policy:
prefer: long_context_quality
overrides:
llm_routing_model: Plano-Orchestrator
empty_pool_behavior: warning
model_capabilities_source:
url: https://models.dev/api.json
refresh_interval: 86400
""")
validate(config, schema)


def test_schema_rejects_unknown_capability_field():
"""capabilities block is closed (additionalProperties: false)."""
from jsonschema import ValidationError

validate, schema = _load_schema()
config = yaml.safe_load("""
version: v0.4.0
listeners:
- type: model
name: model_1
address: 0.0.0.0
port: 12000
model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
capabilities:
supports_telepathy: true
""")
with pytest.raises(ValidationError):
validate(config, schema)


def test_schema_rejects_invalid_empty_pool_behavior():
from jsonschema import ValidationError

validate, schema = _load_schema()
config = yaml.safe_load("""
version: v0.4.0
listeners:
- type: model
name: model_1
address: 0.0.0.0
port: 12000
model_providers:
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
overrides:
empty_pool_behavior: explode
""")
with pytest.raises(ValidationError):
validate(config, schema)
63 changes: 63 additions & 0 deletions config/plano_config_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,22 +184,54 @@ properties:
enum:
- plano
- claude
- anthropic
- deepseek
- groq
- mistral
- openai
- xiaomi
- gemini
- xai
- together_ai
- azure_openai
- ollama
- zhipu
- qwen
- amazon_bedrock
- chatgpt
- digitalocean
- vercel
- openrouter
- moonshotai
- astraflow
- astraflow_cn
headers:
type: object
additionalProperties:
type: string
description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
capabilities:
type: object
additionalProperties: false
description: "Optional model capability overrides. Capabilities default from models.dev (vendored seed + runtime refresh); declare these only to override models.dev or to describe a model models.dev doesn't have. Precedence: this block > models.dev > conservative default."
properties:
context_window:
type: integer
minimum: 1
description: "Maximum total context (input + output) tokens the model accepts."
max_output_tokens:
type: integer
minimum: 1
description: "Maximum output tokens the model can produce."
supports_vision:
type: boolean
description: "Whether the model accepts image input (vision)."
supports_image_generation:
type: boolean
description: "Whether the model serves /v1/images/generations."
supports_audio_out:
type: boolean
description: "Whether the model serves /v1/audio/speech (TTS)."
routing_preferences:
type: array
description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
Expand Down Expand Up @@ -243,17 +275,27 @@ properties:
enum:
- plano
- claude
- anthropic
- deepseek
- groq
- mistral
- openai
- xiaomi
- gemini
- xai
- together_ai
- azure_openai
- ollama
- zhipu
- qwen
- amazon_bedrock
- chatgpt
- digitalocean
- vercel
- openrouter
- moonshotai
- astraflow
- astraflow_cn
headers:
type: object
additionalProperties:
Expand Down Expand Up @@ -316,6 +358,26 @@ properties:
orchestrator_model_context_length:
type: integer
description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
empty_pool_behavior:
type: string
enum:
- error
- warning
default: error
description: "Tier 1 capability filtering is a hard gate. When it removes every model in a matched route, 'error' returns HTTP 422 (default); 'warning' logs and proceeds with the pre-filter pool. This is the only lever that lets routing_preferences win over capability."
model_capabilities_source:
type: object
additionalProperties: false
description: "Optional source for model capabilities, fetched at runtime from models.dev (like cost/latency metrics). Defaults to the public models.dev API; users rarely set this. Omit refresh_interval to fetch only once at startup."
properties:
url:
type: string
default: "https://models.dev/api.json"
description: "models.dev-compatible capability API URL."
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds. Omit to fetch only once at startup."
system_prompt:
type: string
prompt_targets:
Expand Down Expand Up @@ -559,6 +621,7 @@ properties:
enum:
- cheapest
- fastest
- long_context_quality
- none
additionalProperties: false
required:
Expand Down
78 changes: 69 additions & 9 deletions crates/brightstaff/src/handlers/llm/model_selection.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use common::configuration::TopLevelRoutingPreference;
use common::configuration::{EmptyPoolBehavior, TopLevelRoutingPreference};
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::ProviderRequestType;
use hermesllm::{ProviderRequest, ProviderRequestType, RequiredCapabilities};
use hyper::StatusCode;
use std::sync::Arc;
use tracing::{debug, info, warn};

use crate::metrics as bs_metrics;
use crate::metrics::labels as metric_labels;
use crate::router::orchestrator::OrchestratorService;
use crate::router::orchestrator::{OrchestrationError, OrchestratorService};
use crate::streaming::truncate_message;
use crate::tracing::routing;

Expand Down Expand Up @@ -43,6 +43,15 @@ impl RoutingError {
status_code: StatusCode::INTERNAL_SERVER_ERROR,
}
}

/// Tier 1 capability filter left no viable model (D3 `error`). 422 so the
/// client can see this is a request/config mismatch, not a server fault.
pub fn capability_error(message: String) -> Self {
Self {
message,
status_code: StatusCode::UNPROCESSABLE_ENTITY,
}
}
}

/// Determines the routing decision if
Expand Down Expand Up @@ -109,6 +118,14 @@ pub async fn router_chat_get_upstream_model(
"processing router request"
);

// Tier 1: compute the capability requirements implied by this request shape.
let mut required = RequiredCapabilities::for_endpoint(request_path);
required.vision = chat_request.has_vision();
required.min_context_tokens = chat_request.required_context_tokens();
if !required.is_unconstrained() {
debug!(requirement = %required.describe(), "computed required capabilities");
}

// Capture start time for routing span
let routing_start_time = std::time::Instant::now();

Expand All @@ -117,6 +134,7 @@ pub async fn router_chat_get_upstream_model(
&chat_request.messages,
inline_routing_preferences,
request_id,
&required,
)
.await;

Expand Down Expand Up @@ -144,8 +162,42 @@ pub async fn router_chat_get_upstream_model(
})
}
None => {
// No route determined, return sentinel value "none"
// This signals to llm.rs to use the original validated request model
// No preference matched. The "none" sentinel tells llm.rs to use
// the original validated request model — but capability is a hard
// gate even here, so validate that explicit model against the
// request shape (catches e.g. an image sent to a text-only model).
let explicit_model = chat_request.model();
if !required.is_unconstrained()
&& orchestrator_service.has_capability_filter()
&& !orchestrator_service
.is_model_capable(explicit_model, &required)
.await
{
match orchestrator_service.empty_pool_behavior() {
EmptyPoolBehavior::Error => {
current_span.record("route.selected_model", "unknown");
bs_metrics::record_router_decision(
route_label,
"unknown",
true,
determination_elapsed,
);
return Err(RoutingError::capability_error(format!(
"model '{}' cannot serve this request (requires {})",
explicit_model,
required.describe()
)));
}
EmptyPoolBehavior::Warning => {
warn!(
model = %explicit_model,
requirement = %required.describe(),
"explicit model is not capable; empty_pool_behavior=warning, proceeding"
);
}
}
}

current_span.record("route.selected_model", "none");
info!("no route determined, using default model");
bs_metrics::record_router_decision(
Expand All @@ -165,10 +217,18 @@ pub async fn router_chat_get_upstream_model(
Err(err) => {
current_span.record("route.selected_model", "unknown");
bs_metrics::record_router_decision(route_label, "unknown", true, determination_elapsed);
Err(RoutingError::internal_error(format!(
"Failed to determine route: {}",
err
)))
match err {
OrchestrationError::CapabilityFilterEmpty { route, requirement } => {
Err(RoutingError::capability_error(format!(
"no capable model for route '{}': request requires {}",
route, requirement
)))
}
other => Err(RoutingError::internal_error(format!(
"Failed to determine route: {}",
other
))),
}
}
}
}
Loading
Loading