chenyme · cloudriver8 · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 20, 2026
diff --git a/app/control/account/invalid_credentials.py b/app/control/account/invalid_credentials.py
@@ -45,8 +45,7 @@ async def mark_account_invalid_credentials(
                     "expired_reason": reason,
                 },
             )
-        ]
-    )
+        ])
     logger.info(
         "account expired from {}: token={}... status={} upstream_status={}",
         source,
@@ -72,6 +71,12 @@ def feedback_kind_for_error(exc: BaseException | None) -> FeedbackKind:
     status = getattr(exc, "status", 0)
     if status == 429:
         return FeedbackKind.RATE_LIMITED
+    if status == 402:
+        # console.x.ai returns 402 when the account has exhausted its
+        # prepaid web_search/credit balance. Treat it like a rate limit
+        # so the account pool routes around this token until credits
+        # refresh or the operator tops up the balance.
+        return FeedbackKind.RATE_LIMITED
     if status == 401:
         return FeedbackKind.UNAUTHORIZED
     if status == 403:

diff --git a/app/control/account/state_machine.py b/app/control/account/state_machine.py
@@ -27,7 +27,6 @@ class StatePolicy:
 
 _DEFAULT_POLICY = StatePolicy()
 
-
 # ---------------------------------------------------------------------------
 # Feedback
 # ---------------------------------------------------------------------------
@@ -66,7 +65,9 @@ def from_status_code(
             kind = FeedbackKind.UNAUTHORIZED
         elif status_code == 403:
             kind = FeedbackKind.FORBIDDEN
-        elif status_code == 429:
+        elif status_code == 429 or status_code == 402:
+            # 402 from console.x.ai = account credits exhausted; treat as a
+            # rate-limited token so the pool routes around it.
             kind = FeedbackKind.RATE_LIMITED
         elif status_code >= 500:
             kind = FeedbackKind.SERVER_ERROR
@@ -111,9 +112,7 @@ def derive_status(record: AccountRecord, *, now: int | None = None) -> AccountSt
     return AccountStatus.COOLING
 
 
-def is_selectable(
-    record: AccountRecord, mode_id: int, *, now: int | None = None
-) -> bool:
+def is_selectable(record: AccountRecord, mode_id: int, *, now: int | None = None) -> bool:
     """Return True if the account can be selected for *mode_id*."""
     if record.is_deleted():
         return False
@@ -185,10 +184,7 @@ def apply_feedback(
         win = qs.get(feedback.mode_id)
         if win is not None:
             reset_at = (
-                ts + feedback.retry_after_ms
-                if feedback.retry_after_ms
-                else (ts + win.window_seconds * 1000)
-            )
+                ts + feedback.retry_after_ms if feedback.retry_after_ms else (ts + win.window_seconds * 1000))
             qs.set(
                 feedback.mode_id,
                 QuotaWindow(
@@ -206,10 +202,10 @@ def apply_feedback(
         use_count += 1
         last_use_at = ts
     elif feedback.kind not in (
-        FeedbackKind.SUCCESS,
-        FeedbackKind.RESTORE,
-        FeedbackKind.DISABLE,
-        FeedbackKind.DELETE,
+            FeedbackKind.SUCCESS,
+            FeedbackKind.RESTORE,
+            FeedbackKind.DISABLE,
+            FeedbackKind.DELETE,
     ):
         fail_count += 1
         last_fail_at = ts
@@ -236,11 +232,7 @@ def apply_feedback(
             ext[_DISABLED_REASON_KEY] = state_reason
 
     elif feedback.kind == FeedbackKind.RATE_LIMITED:
-        cooldown_ms = (
-            feedback.retry_after_ms
-            if feedback.retry_after_ms
-            else policy.default_cooling_ms
-        )
+        cooldown_ms = (feedback.retry_after_ms if feedback.retry_after_ms else policy.default_cooling_ms)
         status = AccountStatus.COOLING
         state_reason = feedback.reason or "rate_limited"
         ext[_COOLDOWN_UNTIL_KEY] = ts + cooldown_ms
@@ -292,21 +284,20 @@ def apply_feedback(
             "state_reason": state_reason,
             "ext": ext,
             "updated_at": ts,
-        }
-    )
+        })
 
 
 def clear_failures(record: AccountRecord) -> AccountRecord:
     """Reset failure counters and restore ACTIVE status."""
     ext = dict(record.ext)
     for k in (
-        _COOLDOWN_UNTIL_KEY,
-        _COOLDOWN_REASON_KEY,
-        _DISABLED_AT_KEY,
-        _DISABLED_REASON_KEY,
-        _EXPIRED_AT_KEY,
-        _EXPIRED_REASON_KEY,
-        _FORBIDDEN_STRIKE_KEY,
+            _COOLDOWN_UNTIL_KEY,
+            _COOLDOWN_REASON_KEY,
+            _DISABLED_AT_KEY,
+            _DISABLED_REASON_KEY,
+            _EXPIRED_AT_KEY,
+            _EXPIRED_REASON_KEY,
+            _FORBIDDEN_STRIKE_KEY,
     ):
         ext.pop(k, None)
     return record.model_copy(
@@ -318,8 +309,7 @@ def clear_failures(record: AccountRecord) -> AccountRecord:
             "state_reason": None,
             "ext": ext,
             "updated_at": now_ms(),
-        }
-    )
+        })
 
 
 __all__ = [

diff --git a/app/control/model/registry.py b/app/control/model/registry.py
@@ -36,6 +36,23 @@
     # Super+（basic 池不支持此模式）
     ModelSpec("grok-4.3-beta",                          ModeId.GROK_4_3, Tier.SUPER, Capability.CHAT,       True, "Grok 4.3 Beta"),
 
+    # === Console API (console.x.ai/v1/responses) ============================
+    # 通过 SSO cookie 直接调用 console.x.ai，basic 账号即可使用所有模型
+    # 速率限制由 console.x.ai 控制（免费 tier: 1 rps / 60 RPM）
+    # Hybrid reasoning models default to effort="high" so callers that omit
+    # reasoning_effort still get the "think hard" experience the model name
+    # implies. Pass an explicit value (e.g. "minimal") to override.
+    ModelSpec("grok-4.3",                               ModeId.FAST, Tier.BASIC, Capability.CHAT,           True, "Grok 4.3 (Console)",                    console_model="grok-4.3",                       default_reasoning_effort="high"),
+    ModelSpec("grok-4",                                 ModeId.FAST, Tier.BASIC, Capability.CHAT,           True, "Grok 4 (Console)",                      console_model="grok-4",                         default_reasoning_effort="high"),
+    ModelSpec("grok-4.20",                              ModeId.FAST, Tier.BASIC, Capability.CHAT,           True, "Grok 4.20 (Console)",                   console_model="grok-4.20",                      default_reasoning_effort="high"),
+    # Fixed-intensity reasoning model — upstream rejects reasoning.effort.
+    ModelSpec("grok-4.20-reasoning",                    ModeId.FAST, Tier.BASIC, Capability.CHAT,           True, "Grok 4.20 Reasoning (Console)",         console_model="grok-4.20-0309-reasoning"),
+    # Non-reasoning model — effort is not applicable.
+    ModelSpec("grok-4.20-non-reasoning",                ModeId.FAST, Tier.BASIC, Capability.CHAT,           True, "Grok 4.20 Non-Reasoning (Console)",     console_model="grok-4.20-0309-non-reasoning"),
+    # Multi-agent — left default; effort behaviour with this variant has not
+    # been verified, so we don't auto-inject "high" to avoid surprising 400s.
+    ModelSpec("grok-4.20-multi-agent",                  ModeId.FAST, Tier.BASIC, Capability.CHAT,           True, "Grok 4.20 Multi-Agent (Console)",       console_model="grok-4.20-multi-agent-0309"),
+
     # === Image ==============================================================
 
     # Basic fast
@@ -66,7 +83,6 @@
 for _m in MODELS:
     _BY_CAP.setdefault(int(_m.capability), []).append(_m)
 
-
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------

diff --git a/app/control/model/spec.py b/app/control/model/spec.py
@@ -20,6 +20,23 @@ class ModelSpec:
     ``public_name`` is the human-readable display name.
     ``prefer_best`` when True, reverses pool priority to try higher-tier
                     pools first (hard priority, not soft preference).
+    ``console_model`` when non-empty, route this model through the
+                    ``console.x.ai/v1/responses`` endpoint instead of the
+                    ``grok.com`` web chat API. The string is the actual
+                    model ID sent to console.x.ai (e.g. ``"grok-4.3"``).
+                    SSO cookies from grok.com work for both endpoints,
+                    so basic-tier accounts can access all models this way.
+    ``default_reasoning_effort`` when non-empty, this value is forwarded as
+                    ``reasoning.effort`` to the console upstream when the
+                    caller doesn't specify ``reasoning_effort`` themselves.
+                    Use ``"high"`` for hybrid reasoning models the user
+                    expects to "think hard by default" (grok-4, grok-4.3,
+                    grok-4.20). Leave empty for models that either don't
+                    support the effort field (grok-4.20-reasoning is fixed
+                    intensity; the upstream rejects effort with HTTP 400)
+                    or don't reason at all (grok-4.20-non-reasoning).
+                    Only consulted when ``console_model`` is set; ignored
+                    on the legacy grok.com path.
     """
 
     model_name: str
@@ -29,6 +46,8 @@ class ModelSpec:
     enabled: bool
     public_name: str
     prefer_best: bool = False
+    console_model: str = ""
+    default_reasoning_effort: str = ""
 
     # --- convenience predicates ---
 
@@ -47,6 +66,10 @@ def is_video(self) -> bool:
     def is_voice(self) -> bool:
         return bool(self.capability & Capability.VOICE)
 
+    def is_console(self) -> bool:
+        """Return True if this model routes through console.x.ai."""
+        return bool(self.console_model)
+
     def pool_name(self) -> str:
         """Return the canonical pool string for this tier."""
         if self.tier == Tier.SUPER:
@@ -80,15 +103,15 @@ def pool_candidates(self) -> tuple[int, ...]:
         """
         if self.prefer_best:
             if self.tier == Tier.HEAVY:
-                return (2,)  # heavy only
+                return (2, )  # heavy only
             if self.tier == Tier.SUPER:
                 return (2, 1)  # heavy, super
             return (2, 1, 0)  # heavy, super, basic
         if self.tier == Tier.BASIC:
             return (0, 1, 2)  # basic, super, heavy
         if self.tier == Tier.SUPER:
             return (1, 2)  # super, heavy
-        return (2,)  # heavy only
+        return (2, )  # heavy only
 
 
 __all__ = ["ModelSpec"]
diff --git a/app/dataplane/reverse/planner.py b/app/dataplane/reverse/planner.py
@@ -8,27 +8,41 @@
 
 from app.control.model.spec import ModelSpec
 from app.dataplane.reverse.runtime.endpoint_table import (
-    CHAT, MEDIA_POST, WS_IMAGINE,
+    CHAT,
+    CONSOLE_RESPONSES,
+    MEDIA_POST,
+    WS_IMAGINE,
 )
 from .types import ReversePlan, TransportKind
 
-
 # ---------------------------------------------------------------------------
 # Profile defaults (timeout / content-type per transport)
 # ---------------------------------------------------------------------------
 
 _DEFAULTS: dict[TransportKind, dict[str, Any]] = {
-    TransportKind.HTTP_SSE:  {"timeout_s": 120.0, "content_type": "application/json"},
-    TransportKind.HTTP_JSON: {"timeout_s": 30.0,  "content_type": "application/json"},
-    TransportKind.WEBSOCKET: {"timeout_s": 300.0, "content_type": "application/json"},
-    TransportKind.GRPC_WEB:  {"timeout_s": 15.0,  "content_type": "application/grpc-web+proto"},
+    TransportKind.HTTP_SSE: {
+        "timeout_s": 120.0,
+        "content_type": "application/json"
+    },
+    TransportKind.HTTP_JSON: {
+        "timeout_s": 30.0,
+        "content_type": "application/json"
+    },
+    TransportKind.WEBSOCKET: {
+        "timeout_s": 300.0,
+        "content_type": "application/json"
+    },
+    TransportKind.GRPC_WEB: {
+        "timeout_s": 15.0,
+        "content_type": "application/grpc-web+proto"
+    },
 }
 
-
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
 
+
 def build_plan(spec: ModelSpec, request: dict[str, Any] | None = None) -> ReversePlan:
     """Produce a ReversePlan for the given model spec.
 
@@ -39,25 +53,34 @@ def build_plan(spec: ModelSpec, request: dict[str, Any] | None = None) -> Revers
     defaults = _DEFAULTS.get(tkind, _DEFAULTS[TransportKind.HTTP_JSON])
 
     return ReversePlan(
-        endpoint        = endpoint,
-        transport_kind  = tkind,
-        pool_candidates = spec.pool_candidates(),
-        mode_id         = int(spec.mode_id),
-        timeout_s       = defaults["timeout_s"],
-        content_type    = defaults["content_type"],
+        endpoint=endpoint,
+        transport_kind=tkind,
+        pool_candidates=spec.pool_candidates(),
+        mode_id=int(spec.mode_id),
+        timeout_s=defaults["timeout_s"],
+        content_type=defaults["content_type"],
     )
 
 
 # ---------------------------------------------------------------------------
 # Internal routing logic
 # ---------------------------------------------------------------------------
 
+
 def _resolve_endpoint(
     spec: ModelSpec,
     request: dict[str, Any],
 ) -> tuple[str, TransportKind]:
     """Determine (endpoint_url, transport_kind) for the given capability."""
 
+    # Console models route through console.x.ai/v1/responses (OpenAI Responses API)
+    if spec.is_console() and spec.is_chat():
+        # When stream=true the response is SSE; otherwise plain JSON.
+        # Use HTTP_SSE as the default since both streaming and non-streaming
+        # share the same content-type and the timeout profile is more
+        # permissive (long-running responses).
+        return CONSOLE_RESPONSES, TransportKind.HTTP_SSE
+
     if spec.is_chat():
         return CHAT, TransportKind.HTTP_SSE