From 7b4facbcf811be68d25515a1376aee694448b4ec Mon Sep 17 00:00:00 2001 From: xyz <2523269+antojoseph@users.noreply.github.com> Date: Thu, 30 Apr 2026 17:46:02 -0400 Subject: [PATCH 1/2] security: call block_dangerous_modules during engine load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit block_dangerous_modules was defined but never called in the production load path — only in tests. socket, subprocess, ctypes and multiprocessing were importable by provider-controlled vllm_mlx code despite the reported dangerous_modules_blocked capability being true. Call both lock_python_path and block_dangerous_modules at the top of load_vllm_mlx so both security layers are active in the same GIL scope that runs the model load. --- provider/src/inference.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/provider/src/inference.rs b/provider/src/inference.rs index 187b64f7..77512673 100644 --- a/provider/src/inference.rs +++ b/provider/src/inference.rs @@ -331,6 +331,13 @@ for _name in ( /// OpenAI-compatible features (chat templates, tool calling, structured /// output) without starting an HTTP server. fn load_vllm_mlx(&self, py: Python<'_>) -> Result<()> { + // Enforce both security layers in the same GIL scope that runs vllm_mlx. + // lock_python_path was already called in detect_engine(), but re-running it + // here is safe (idempotent) and ensures the blocker is installed in the + // same interpreter state that will execute the model load. + Self::lock_python_path(py)?; + Self::block_dangerous_modules(py)?; + let model = serde_json::to_string(&self.model_id).context("invalid model path")?; let cache_key = serde_json::to_string(&self.cache_key).context("invalid cache key")?; let code = format!( From 9e7af78c481bb4e309ab2766afd9bf9334e76509 Mon Sep 17 00:00:00 2001 From: xyz <2523269+antojoseph@users.noreply.github.com> Date: Sun, 3 May 2026 23:39:54 -0400 Subject: [PATCH 2/2] fix: call block_dangerous_modules after model load, not before _load_model may download weights from HuggingFace on cold starts, which requires socket and urllib. Blocking those modules before load caused first-run failures when the model was not pre-cached. sys.path is still locked before the load; the dangerous-module blocker is now installed after the engine is initialized and before inference requests are served. --- provider/src/inference.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/provider/src/inference.rs b/provider/src/inference.rs index 77512673..2a27b734 100644 --- a/provider/src/inference.rs +++ b/provider/src/inference.rs @@ -331,12 +331,12 @@ for _name in ( /// OpenAI-compatible features (chat templates, tool calling, structured /// output) without starting an HTTP server. fn load_vllm_mlx(&self, py: Python<'_>) -> Result<()> { - // Enforce both security layers in the same GIL scope that runs vllm_mlx. - // lock_python_path was already called in detect_engine(), but re-running it - // here is safe (idempotent) and ensures the blocker is installed in the - // same interpreter state that will execute the model load. + // Lock sys.path before the model load so no extra packages can be injected. + // block_dangerous_modules is intentionally called AFTER _load_model completes: + // load_model() may download weights from HuggingFace on a cold start, which + // requires socket/urllib. The blocker is installed once the engine is cached + // and before the process begins serving inference requests. Self::lock_python_path(py)?; - Self::block_dangerous_modules(py)?; let model = serde_json::to_string(&self.model_id).context("invalid model path")?; let cache_key = serde_json::to_string(&self.cache_key).context("invalid cache key")?; @@ -364,6 +364,10 @@ except Exception as _e: let ccode = CString::new(code).context("invalid code string")?; py.run(ccode.as_c_str(), None, None) .context("failed to initialize vllm-mlx engine via server handler")?; + + // Block dangerous modules now that the model is loaded. Any attempt by + // inference-time Python code to import socket, subprocess, etc. will fail. + Self::block_dangerous_modules(py)?; Ok(()) }