3434_active_model : str | None = None
3535_swap_lock = threading .Lock ()
3636
37+ _MODEL_STATE_FILE = Path ("data" ) / "util" / "active_llm_model.txt"
38+
39+
40+ def _read_persisted_model () -> str | None :
41+ """Read the last swapped model name from disk (survives restarts)."""
42+ try :
43+ text = _MODEL_STATE_FILE .read_text ().strip ()
44+ return text if text else None
45+ except Exception :
46+ return None
47+
48+
49+ def _write_persisted_model (profile : str | None ) -> None :
50+ """Persist the active model name so auto-spawn uses it after restart."""
51+ try :
52+ _MODEL_STATE_FILE .parent .mkdir (parents = True , exist_ok = True )
53+ _MODEL_STATE_FILE .write_text (profile or "" )
54+ except Exception :
55+ pass
56+
57+
3758# ─── env var plumbing ─────────────────────────────────────────────────────
3859
3960_DEFAULT_MAX_BACKENDS = 2
@@ -702,11 +723,19 @@ def _maybe_auto_spawn_server(
702723 )
703724 return lane_configs
704725
726+ # Check for a persisted model from a previous `maxim peer llm` swap.
727+ # This survives os.execv restarts so the leader comes back with the
728+ # same model the user selected, not the default profile.
729+ persisted = _read_persisted_model ()
730+ effective_profile = persisted or infer_cfg .model_profile
731+ if persisted and logger is not None :
732+ logger .info ("Auto-spawn: using persisted model '%s' from previous swap" , persisted )
733+
705734 # Resolve the profile's GGUF path
706735 try :
707736 from maxim .models .language .config import load_llm_config
708737
709- profile_cfg = load_llm_config (profile_override = infer_cfg . model_profile )
738+ profile_cfg = load_llm_config (profile_override = effective_profile )
710739 model_path = getattr (profile_cfg , "model_path" , "" )
711740 except Exception :
712741 return lane_configs
@@ -811,7 +840,7 @@ class _Fallback:
811840 # Track active spawner for hot-swap via `maxim peer llm <model>`
812841 global _active_spawner , _active_model # noqa: PLW0603
813842 _active_spawner = spawner
814- _active_model = infer_cfg . model_profile
843+ _active_model = effective_profile
815844
816845 # Rewrite the infer lane to point at the spawned server. Auto-wire the
817846 # API key for the leader's own client so local inference doesn't 401.
@@ -820,7 +849,7 @@ class _Fallback:
820849 out ["infer" ] = dataclasses .replace (
821850 infer_cfg ,
822851 remote_url = url ,
823- remote_model = infer_cfg . model_profile ,
852+ remote_model = effective_profile ,
824853 remote_api_key = infer_api_key ,
825854 )
826855
@@ -981,6 +1010,7 @@ def swap_llm_server(profile: str, logger: Any | None = None) -> dict[str, Any]:
9811010
9821011 _active_spawner = spawner
9831012 _active_model = resolved
1013+ _write_persisted_model (resolved )
9841014
9851015 return {
9861016 "status" : "swapped" ,
0 commit comments