katanemo · Spherrrical · Jun 22, 2026
diff --git a/cli/test/test_config_generator.py b/cli/test/test_config_generator.py
@@ -823,3 +823,98 @@ def test_apply_kimi_code_provider_defaults_injects_user_agent():
     apply_kimi_code_provider_defaults(provider)
     assert provider["base_url"] == "https://api.kimi.com/coding/v1"
     assert provider["headers"]["User-Agent"] == "KimiCLI/1.3"
+
+
+def _load_schema():
+    from jsonschema import validate
+
+    with open("../config/plano_config_schema.yaml", "r") as f:
+        return validate, yaml.safe_load(f.read())
+
+
+def test_schema_accepts_capabilities_and_signal_routing():
+    """Capability/signal-aware routing fields validate against the schema."""
+    validate, schema = _load_schema()
+    config = yaml.safe_load("""
+version: v0.4.0
+listeners:
+  - type: model
+    name: model_1
+    address: 0.0.0.0
+    port: 12000
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    default: true
+  - model: anthropic/claude-opus-4-1-128k
+    access_key: $ANTHROPIC_API_KEY
+    capabilities:
+      context_window: 128000
+  - model: openai/llama-3.3-70b-vision
+    base_url: https://api.custom-provider.com
+    access_key: $CUSTOM_API_KEY
+    capabilities:
+      context_window: 128000
+      supports_vision: true
+      supports_image_generation: false
+      supports_audio_out: false
+      max_output_tokens: 8192
+routing_preferences:
+  - name: long document analysis
+    description: summarizing or querying very large documents
+    models:
+      - anthropic/claude-opus-4-1-128k
+      - openai/gpt-4o
+    selection_policy:
+      prefer: long_context_quality
+overrides:
+  llm_routing_model: Plano-Orchestrator
+  empty_pool_behavior: warning
+  model_capabilities_source:
+    url: https://models.dev/api.json
+    refresh_interval: 86400
+""")
+    validate(config, schema)
+
+
+def test_schema_rejects_unknown_capability_field():
+    """capabilities block is closed (additionalProperties: false)."""
+    from jsonschema import ValidationError
+
+    validate, schema = _load_schema()
+    config = yaml.safe_load("""
+version: v0.4.0
+listeners:
+  - type: model
+    name: model_1
+    address: 0.0.0.0
+    port: 12000
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    capabilities:
+      supports_telepathy: true
+""")
+    with pytest.raises(ValidationError):
+        validate(config, schema)
+
+
+def test_schema_rejects_invalid_empty_pool_behavior():
+    from jsonschema import ValidationError
+
+    validate, schema = _load_schema()
+    config = yaml.safe_load("""
+version: v0.4.0
+listeners:
+  - type: model
+    name: model_1
+    address: 0.0.0.0
+    port: 12000
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+overrides:
+  empty_pool_behavior: explode
+""")
+    with pytest.raises(ValidationError):
+        validate(config, schema)
diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml
@@ -184,22 +184,54 @@ properties:
           enum:
             - plano
             - claude
+            - anthropic
             - deepseek
             - groq
             - mistral
             - openai
             - xiaomi
             - gemini
+            - xai
+            - together_ai
+            - azure_openai
+            - ollama
+            - zhipu
+            - qwen
+            - amazon_bedrock
             - chatgpt
             - digitalocean
             - vercel
             - openrouter
             - moonshotai
+            - astraflow
+            - astraflow_cn
         headers:
           type: object
           additionalProperties:
             type: string
           description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)."
+        capabilities:
+          type: object
+          additionalProperties: false
+          description: "Optional model capability overrides. Capabilities default from models.dev (vendored seed + runtime refresh); declare these only to override models.dev or to describe a model models.dev doesn't have. Precedence: this block > models.dev > conservative default."
+          properties:
+            context_window:
+              type: integer
+              minimum: 1
+              description: "Maximum total context (input + output) tokens the model accepts."
+            max_output_tokens:
+              type: integer
+              minimum: 1
+              description: "Maximum output tokens the model can produce."
+            supports_vision:
+              type: boolean
+              description: "Whether the model accepts image input (vision)."
+            supports_image_generation:
+              type: boolean
+              description: "Whether the model serves /v1/images/generations."
+            supports_audio_out:
+              type: boolean
+              description: "Whether the model serves /v1/audio/speech (TTS)."
         routing_preferences:
           type: array
           description: "[DEPRECATED] Inline routing_preferences under a model_provider are auto-migrated to the top-level routing_preferences list by the config generator. New configs should declare routing_preferences at the top level with an explicit models: [...] list. See docs/routing-api.md."
@@ -243,17 +275,27 @@ properties:
           enum:
             - plano
             - claude
+            - anthropic
             - deepseek
             - groq
             - mistral
             - openai
             - xiaomi
             - gemini
+            - xai
+            - together_ai
+            - azure_openai
+            - ollama
+            - zhipu
+            - qwen
+            - amazon_bedrock
             - chatgpt
             - digitalocean
             - vercel
             - openrouter
             - moonshotai
+            - astraflow
+            - astraflow_cn
         headers:
           type: object
           additionalProperties:
@@ -316,6 +358,26 @@ properties:
       orchestrator_model_context_length:
         type: integer
         description: "Maximum token length for the orchestrator/routing model context window. Default is 8192."
+      empty_pool_behavior:
+        type: string
+        enum:
+          - error
+          - warning
+        default: error
+        description: "Tier 1 capability filtering is a hard gate. When it removes every model in a matched route, 'error' returns HTTP 422 (default); 'warning' logs and proceeds with the pre-filter pool. This is the only lever that lets routing_preferences win over capability."
+      model_capabilities_source:
+        type: object
+        additionalProperties: false
+        description: "Optional source for model capabilities, fetched at runtime from models.dev (like cost/latency metrics). Defaults to the public models.dev API; users rarely set this. Omit refresh_interval to fetch only once at startup."
+        properties:
+          url:
+            type: string
+            default: "https://models.dev/api.json"
+            description: "models.dev-compatible capability API URL."
+          refresh_interval:
+            type: integer
+            minimum: 1
+            description: "Refresh interval in seconds. Omit to fetch only once at startup."
   system_prompt:
     type: string
   prompt_targets:
@@ -559,6 +621,7 @@ properties:
               enum:
                 - cheapest
                 - fastest
+                - long_context_quality
                 - none
           additionalProperties: false
           required:

diff --git a/crates/brightstaff/src/handlers/llm/model_selection.rs b/crates/brightstaff/src/handlers/llm/model_selection.rs
@@ -1,13 +1,13 @@
-use common::configuration::TopLevelRoutingPreference;
+use common::configuration::{EmptyPoolBehavior, TopLevelRoutingPreference};
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
-use hermesllm::ProviderRequestType;
+use hermesllm::{ProviderRequest, ProviderRequestType, RequiredCapabilities};
 use hyper::StatusCode;
 use std::sync::Arc;
 use tracing::{debug, info, warn};
 
 use crate::metrics as bs_metrics;
 use crate::metrics::labels as metric_labels;
-use crate::router::orchestrator::OrchestratorService;
+use crate::router::orchestrator::{OrchestrationError, OrchestratorService};
 use crate::streaming::truncate_message;
 use crate::tracing::routing;
 
@@ -43,6 +43,15 @@ impl RoutingError {
             status_code: StatusCode::INTERNAL_SERVER_ERROR,
         }
     }
+
+    /// Tier 1 capability filter left no viable model (D3 `error`). 422 so the
+    /// client can see this is a request/config mismatch, not a server fault.
+    pub fn capability_error(message: String) -> Self {
+        Self {
+            message,
+            status_code: StatusCode::UNPROCESSABLE_ENTITY,
+        }
+    }
 }
 
 /// Determines the routing decision if
@@ -109,6 +118,14 @@ pub async fn router_chat_get_upstream_model(
         "processing router request"
     );
 
+    // Tier 1: compute the capability requirements implied by this request shape.
+    let mut required = RequiredCapabilities::for_endpoint(request_path);
+    required.vision = chat_request.has_vision();
+    required.min_context_tokens = chat_request.required_context_tokens();
+    if !required.is_unconstrained() {
+        debug!(requirement = %required.describe(), "computed required capabilities");
+    }
+
     // Capture start time for routing span
     let routing_start_time = std::time::Instant::now();
 
@@ -117,6 +134,7 @@ pub async fn router_chat_get_upstream_model(
             &chat_request.messages,
             inline_routing_preferences,
             request_id,
+            &required,
         )
         .await;
 
@@ -144,8 +162,42 @@ pub async fn router_chat_get_upstream_model(
                 })
             }
             None => {
-                // No route determined, return sentinel value "none"
-                // This signals to llm.rs to use the original validated request model
+                // No preference matched. The "none" sentinel tells llm.rs to use
+                // the original validated request model — but capability is a hard
+                // gate even here, so validate that explicit model against the
+                // request shape (catches e.g. an image sent to a text-only model).
+                let explicit_model = chat_request.model();
+                if !required.is_unconstrained()
+                    && orchestrator_service.has_capability_filter()
+                    && !orchestrator_service
+                        .is_model_capable(explicit_model, &required)
+                        .await
+                {
+                    match orchestrator_service.empty_pool_behavior() {
+                        EmptyPoolBehavior::Error => {
+                            current_span.record("route.selected_model", "unknown");
+                            bs_metrics::record_router_decision(
+                                route_label,
+                                "unknown",
+                                true,
+                                determination_elapsed,
+                            );
+                            return Err(RoutingError::capability_error(format!(
+                                "model '{}' cannot serve this request (requires {})",
+                                explicit_model,
+                                required.describe()
+                            )));
+                        }
+                        EmptyPoolBehavior::Warning => {
+                            warn!(
+                                model = %explicit_model,
+                                requirement = %required.describe(),
+                                "explicit model is not capable; empty_pool_behavior=warning, proceeding"
+                            );
+                        }
+                    }
+                }
+
                 current_span.record("route.selected_model", "none");
                 info!("no route determined, using default model");
                 bs_metrics::record_router_decision(
@@ -165,10 +217,18 @@ pub async fn router_chat_get_upstream_model(
         Err(err) => {
             current_span.record("route.selected_model", "unknown");
             bs_metrics::record_router_decision(route_label, "unknown", true, determination_elapsed);
-            Err(RoutingError::internal_error(format!(
-                "Failed to determine route: {}",
-                err
-            )))
+            match err {
+                OrchestrationError::CapabilityFilterEmpty { route, requirement } => {
+                    Err(RoutingError::capability_error(format!(
+                        "no capable model for route '{}': request requires {}",
+                        route, requirement
+                    )))
+                }
+                other => Err(RoutingError::internal_error(format!(
+                    "Failed to determine route: {}",
+                    other
+                ))),
+            }
         }
     }
 }