Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions provider/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,13 @@ for _name in (
/// OpenAI-compatible features (chat templates, tool calling, structured
/// output) without starting an HTTP server.
fn load_vllm_mlx(&self, py: Python<'_>) -> Result<()> {
// Lock sys.path before the model load so no extra packages can be injected.
// block_dangerous_modules is intentionally called AFTER _load_model completes:
// load_model() may download weights from HuggingFace on a cold start, which
// requires socket/urllib. The blocker is installed once the engine is cached
// and before the process begins serving inference requests.
Self::lock_python_path(py)?;

let model = serde_json::to_string(&self.model_id).context("invalid model path")?;
let cache_key = serde_json::to_string(&self.cache_key).context("invalid cache key")?;
let code = format!(
Expand All @@ -357,6 +364,10 @@ except Exception as _e:
let ccode = CString::new(code).context("invalid code string")?;
py.run(ccode.as_c_str(), None, None)
.context("failed to initialize vllm-mlx engine via server handler")?;

// Block dangerous modules now that the model is loaded. Any attempt by
// inference-time Python code to import socket, subprocess, etc. will fail.
Self::block_dangerous_modules(py)?;
Ok(())
}

Expand Down
Loading