From 7b4facbcf811be68d25515a1376aee694448b4ec Mon Sep 17 00:00:00 2001
From: xyz <2523269+antojoseph@users.noreply.github.com>
Date: Thu, 30 Apr 2026 17:46:02 -0400
Subject: [PATCH 1/2] security: call block_dangerous_modules during engine load
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

block_dangerous_modules was defined but never called in the production
load path — only in tests. socket, subprocess, ctypes and multiprocessing
were importable by provider-controlled vllm_mlx code despite the reported
dangerous_modules_blocked capability being true.

Call both lock_python_path and block_dangerous_modules at the top of
load_vllm_mlx so both security layers are active in the same GIL scope
that runs the model load.
---
 provider/src/inference.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/provider/src/inference.rs b/provider/src/inference.rs
index 187b64f7..77512673 100644
--- a/provider/src/inference.rs
+++ b/provider/src/inference.rs
@@ -331,6 +331,13 @@ for _name in (
     /// OpenAI-compatible features (chat templates, tool calling, structured
     /// output) without starting an HTTP server.
     fn load_vllm_mlx(&self, py: Python<'_>) -> Result<()> {
+        // Enforce both security layers in the same GIL scope that runs vllm_mlx.
+        // lock_python_path was already called in detect_engine(), but re-running it
+        // here is safe (idempotent) and ensures the blocker is installed in the
+        // same interpreter state that will execute the model load.
+        Self::lock_python_path(py)?;
+        Self::block_dangerous_modules(py)?;
+
         let model = serde_json::to_string(&self.model_id).context("invalid model path")?;
         let cache_key = serde_json::to_string(&self.cache_key).context("invalid cache key")?;
         let code = format!(

From 9e7af78c481bb4e309ab2766afd9bf9334e76509 Mon Sep 17 00:00:00 2001
From: xyz <2523269+antojoseph@users.noreply.github.com>
Date: Sun, 3 May 2026 23:39:54 -0400
Subject: [PATCH 2/2] fix: call block_dangerous_modules after model load, not
 before

_load_model may download weights from HuggingFace on cold starts, which
requires socket and urllib. Blocking those modules before load caused
first-run failures when the model was not pre-cached.

sys.path is still locked before the load; the dangerous-module blocker
is now installed after the engine is initialized and before inference
requests are served.
---
 provider/src/inference.rs | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/provider/src/inference.rs b/provider/src/inference.rs
index 77512673..2a27b734 100644
--- a/provider/src/inference.rs
+++ b/provider/src/inference.rs
@@ -331,12 +331,12 @@ for _name in (
     /// OpenAI-compatible features (chat templates, tool calling, structured
     /// output) without starting an HTTP server.
     fn load_vllm_mlx(&self, py: Python<'_>) -> Result<()> {
-        // Enforce both security layers in the same GIL scope that runs vllm_mlx.
-        // lock_python_path was already called in detect_engine(), but re-running it
-        // here is safe (idempotent) and ensures the blocker is installed in the
-        // same interpreter state that will execute the model load.
+        // Lock sys.path before the model load so no extra packages can be injected.
+        // block_dangerous_modules is intentionally called AFTER _load_model completes:
+        // load_model() may download weights from HuggingFace on a cold start, which
+        // requires socket/urllib. The blocker is installed once the engine is cached
+        // and before the process begins serving inference requests.
         Self::lock_python_path(py)?;
-        Self::block_dangerous_modules(py)?;
 
         let model = serde_json::to_string(&self.model_id).context("invalid model path")?;
         let cache_key = serde_json::to_string(&self.cache_key).context("invalid cache key")?;
@@ -364,6 +364,10 @@ except Exception as _e:
         let ccode = CString::new(code).context("invalid code string")?;
         py.run(ccode.as_c_str(), None, None)
             .context("failed to initialize vllm-mlx engine via server handler")?;
+
+        // Block dangerous modules now that the model is loaded. Any attempt by
+        // inference-time Python code to import socket, subprocess, etc. will fail.
+        Self::block_dangerous_modules(py)?;
         Ok(())
     }