diff --git a/clients/cli/Cargo.lock b/clients/cli/Cargo.lock
index bd612d0e..072e0b0b 100644
--- a/clients/cli/Cargo.lock
+++ b/clients/cli/Cargo.lock
@@ -1700,6 +1700,7 @@ dependencies = [
  "rand 0.8.5",
  "rand_core 0.6.4",
  "ratatui",
+ "rayon",
  "reqwest",
  "semver",
  "serde",
diff --git a/clients/cli/Cargo.toml b/clients/cli/Cargo.toml
index 4179e555..62ae1730 100644
--- a/clients/cli/Cargo.toml
+++ b/clients/cli/Cargo.toml
@@ -52,6 +52,7 @@ postcard = "1.0.10"
 prost = "0.13"
 prost-types = "0.13.5"
 rand = "0.8"
+rayon = "1"
 rand_core = "0.6"
 ratatui = "0.29.0"
 reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
diff --git a/clients/cli/src/main.rs b/clients/cli/src/main.rs
index cd1cefba..ea16355b 100644
--- a/clients/cli/src/main.rs
+++ b/clients/cli/src/main.rs
@@ -83,6 +83,58 @@ fn print_available_difficulties() {
     }
 }
 
+/// Returns the error message shown when the CPU lacks required SIMD instructions.
+///
+/// This is a separate function so tests can verify the message content without
+/// needing to run on hardware that actually lacks the feature.
+#[cfg(any(target_arch = "x86_64", test))]
+fn avx2_missing_message() -> &'static str {
+    concat!(
+        "Error: Your processor does not support AVX2 instructions required by the Nexus prover.\n",
+        "\n",
+        "Your CPU is too old to run the Nexus CLI. Please use a newer machine:\n",
+        "\n",
+        "  Supported processors:\n",
+        "    Intel — 4th generation (Haswell, 2013) or newer\n",
+        "    AMD   — Ryzen / Zen architecture (2017) or newer\n",
+        "\n",
+        "If you believe your processor is supported, make sure you are running\n",
+        "the correct binary for your platform (x86_64 vs. ARM).",
+    )
+}
+
+/// Returns the AVX2 error message when `avx2_supported` is false, or `None`
+/// when the CPU meets requirements.
+///
+/// Accepts the AVX2 detection result as a parameter so callers can inject
+/// `false` in unit tests without needing hardware that lacks AVX2.
+#[cfg(any(target_arch = "x86_64", test))]
+fn cpu_feature_error(avx2_supported: bool) -> Option<&'static str> {
+    if avx2_supported { None } else { Some(avx2_missing_message()) }
+}
+
+/// Probes the running CPU and returns an error message if any required
+/// instruction-set extension is missing, or `None` if all requirements are met.
+///
+/// On x86_64, the Nexus CLI is compiled with `-C target-cpu=native`, which
+/// enables AVX2 code paths in the stwo SIMD prover backend at compile time.
+/// Running the resulting binary on a CPU that lacks AVX2 causes SIGILL (reported
+/// as a floating-point exception) rather than a meaningful error.  Checking here
+/// — before any prover code runs — lets us exit with a clear message instead.
+///
+/// On non-x86_64 targets (aarch64, wasm32, …) the prover uses NEON or scalar
+/// paths and has no AVX2 dependency, so the check is skipped entirely.
+#[cfg(target_arch = "x86_64")]
+fn check_cpu_features() -> Option<&'static str> {
+    cpu_feature_error(is_x86_feature_detected!("avx2"))
+}
+
+/// On non-x86_64 platforms there are no AVX2 requirements; always pass.
+#[cfg(not(target_arch = "x86_64"))]
+fn check_cpu_features() -> Option<&'static str> {
+    None
+}
+
 #[derive(Parser)]
 #[command(author, version = concat!(env!("CARGO_PKG_VERSION"), " (build ", env!("BUILD_TIMESTAMP"), ")"), about, long_about = None)]
 /// Command-line arguments
@@ -152,11 +204,20 @@ enum Command {
         /// Serialized inputs blob
         #[arg(long)]
         inputs: String,
+        /// Number of Rayon threads for this subprocess (0 = use Rayon default)
+        #[arg(long, default_value_t = 0)]
+        num_threads: usize,
     },
 }
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn Error>> {
+    // Check for required CPU features before anything else runs.
+    if let Some(msg) = check_cpu_features() {
+        eprintln!("{}", msg);
+        std::process::exit(1);
+    }
+
     // Set up panic hook to prevent core dumps
     std::panic::set_hook(Box::new(|panic_info| {
         eprintln!("Panic occurred: {}", panic_info);
@@ -218,9 +279,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
             let orchestrator = Box::new(OrchestratorClient::new(environment));
             register_node(node_id, &config_path, orchestrator).await
         }
-        Command::ProveFibSubprocess { inputs } => {
+        Command::ProveFibSubprocess { inputs, num_threads } => {
             let inputs: (u32, u32, u32) = serde_json::from_str(&inputs)?;
-            match ProvingEngine::prove_fib_subprocess(&inputs) {
+            match ProvingEngine::prove_fib_subprocess(&inputs, num_threads) {
                 Ok(proof) => {
                     let bytes = to_allocvec(&proof)?;
                     let mut out = std::io::stdout().lock();
@@ -358,3 +419,103 @@ mod tests {
         }
     }
 }
+
+#[cfg(test)]
+mod cpu_check_tests {
+    use super::*;
+
+    // --- Message content tests (platform-independent) ---
+
+    #[test]
+    fn avx2_error_message_names_avx2() {
+        assert!(
+            avx2_missing_message().contains("AVX2"),
+            "error message must name the missing feature (AVX2)"
+        );
+    }
+
+    #[test]
+    fn avx2_error_message_names_intel_and_amd() {
+        let msg = avx2_missing_message();
+        assert!(msg.contains("Intel"), "error message must name Intel CPUs");
+        assert!(msg.contains("AMD"), "error message must name AMD CPUs");
+    }
+
+    #[test]
+    fn avx2_error_message_names_specific_cpu_generations() {
+        let msg = avx2_missing_message();
+        // Intel: Haswell or the year 2013
+        assert!(
+            msg.contains("Haswell") || msg.contains("2013"),
+            "error message must identify the minimum Intel generation"
+        );
+        // AMD: Ryzen, Zen, or the year 2017
+        assert!(
+            msg.contains("Ryzen") || msg.contains("Zen") || msg.contains("2017"),
+            "error message must identify the minimum AMD generation"
+        );
+    }
+
+    #[test]
+    fn avx2_error_message_explains_the_cpu_is_too_old() {
+        let msg = avx2_missing_message();
+        assert!(
+            msg.contains("too old") || msg.contains("older"),
+            "error message must explain that the CPU is too old"
+        );
+    }
+
+    // --- Logic tests via cpu_feature_error (injectable, platform-independent) ---
+
+    #[test]
+    fn cpu_feature_error_returns_none_when_avx2_present() {
+        assert!(
+            cpu_feature_error(true).is_none(),
+            "cpu_feature_error should return None when AVX2 is available"
+        );
+    }
+
+    #[test]
+    fn cpu_feature_error_returns_message_when_avx2_absent() {
+        let result = cpu_feature_error(false);
+        assert!(
+            result.is_some(),
+            "cpu_feature_error should return Some(msg) when AVX2 is absent"
+        );
+        // Verify the returned message is the AVX2 error (not empty or a placeholder)
+        let msg = result.unwrap();
+        assert!(
+            msg.contains("AVX2"),
+            "returned error must describe the missing feature"
+        );
+    }
+
+    // --- Platform-specific runtime tests ---
+
+    /// On x86_64, check_cpu_features() must pass on any machine capable of
+    /// running this test binary.  If this fails the developer's CPU genuinely
+    /// lacks AVX2 and the machine is unsupported.
+    #[test]
+    #[cfg(target_arch = "x86_64")]
+    fn cpu_check_passes_on_avx2_capable_x86_64() {
+        if is_x86_feature_detected!("avx2") {
+            assert!(
+                check_cpu_features().is_none(),
+                "check_cpu_features() must return None on an AVX2-capable x86_64 CPU"
+            );
+        }
+        // If the machine truly lacks AVX2, the check returning Some(_) is correct
+        // behaviour; we don't fail the test in that case.
+    }
+
+    /// On non-x86_64 architectures the check is compiled out entirely and must
+    /// always return None regardless of what features the CPU exposes.
+    #[test]
+    #[cfg(not(target_arch = "x86_64"))]
+    fn cpu_check_is_skipped_on_non_x86_64() {
+        assert!(
+            check_cpu_features().is_none(),
+            "check_cpu_features() must return None on non-x86_64 platforms"
+        );
+    }
+}
diff --git a/clients/cli/src/prover/engine.rs b/clients/cli/src/prover/engine.rs
index e090a77e..13c4b3d2 100644
--- a/clients/cli/src/prover/engine.rs
+++ b/clients/cli/src/prover/engine.rs
@@ -31,7 +31,12 @@ impl ProvingEngine {
     }
 
     /// Subprocess entrypoint: generate proof without verification
-    pub fn prove_fib_subprocess(inputs: &(u32, u32, u32)) -> Result<Proof, ProverError> {
+    pub fn prove_fib_subprocess(inputs: &(u32, u32, u32), num_threads: usize) -> Result<Proof, ProverError> {
+        if num_threads > 0 {
+            let _ = rayon::ThreadPoolBuilder::new()
+                .num_threads(num_threads)
+                .build_global();
+        }
         let prover = Self::create_fib_prover()?;
         let (view, proof) = prover
             .prove_with_input::<(), (u32, u32, u32)>(&(), inputs)
@@ -53,6 +58,7 @@ impl ProvingEngine {
         task: &Task,
         environment: &Environment,
         client_id: &str,
+        num_threads: usize,
     ) -> Result<Proof, ProverError> {
         // Spawn a subprocess for proof generation to isolate memory usage
         let exe_path = env::current_exe()?;
@@ -60,6 +66,8 @@ impl ProvingEngine {
         cmd.arg("prove-fib-subprocess")
             .arg("--inputs")
             .arg(serde_json::to_string(inputs)?)
+            .arg("--num-threads")
+            .arg(num_threads.to_string())
             .stdout(Stdio::piped())
             .stderr(Stdio::inherit());
 
diff --git a/clients/cli/src/prover/pipeline.rs b/clients/cli/src/prover/pipeline.rs
index b7c11b84..e8011675 100644
--- a/clients/cli/src/prover/pipeline.rs
+++ b/clients/cli/src/prover/pipeline.rs
@@ -96,6 +96,7 @@ impl ProvingPipeline {
                         &task_ref,
                         &environment_ref,
                         &client_id_ref,
+                        num_workers,
                     )
                     .await?;
 
diff --git a/clients/cli/src/session/setup.rs b/clients/cli/src/session/setup.rs
index e2817d46..799a659a 100644
--- a/clients/cli/src/session/setup.rs
+++ b/clients/cli/src/session/setup.rs
@@ -117,8 +117,9 @@ pub async fn setup_session(
     let max_workers = ((total_cores as f64 * 0.75).ceil() as usize).max(1);
     let mut num_workers: usize = max_threads.unwrap_or(1).clamp(1, max_workers as u32) as usize;
 
-    // Check memory and clamp threads if max-threads was explicitly set OR check-memory flag is set
-    if max_threads.is_some() || check_mem {
+    // Check memory and clamp threads only when --check-memory is explicitly requested.
+    // When the user explicitly sets --max-threads, trust their hardware knowledge.
+    if check_mem {
         let memory_clamped_workers = clamp_threads_by_memory(num_workers);
         if memory_clamped_workers < num_workers {
             crate::print_cmd_warn!(
@@ -166,3 +167,81 @@ pub async fn setup_session(
         num_workers,
     })
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn compute_num_workers(max_threads: Option<u32>, total_cores: usize) -> usize {
+        let max_workers = ((total_cores as f64 * 0.75).ceil() as usize).max(1);
+        max_threads.unwrap_or(1).clamp(1, max_workers as u32) as usize
+    }
+
+    #[test]
+    fn num_workers_defaults_to_1_without_flag() {
+        let workers = compute_num_workers(None, 8);
+        assert_eq!(workers, 1);
+    }
+
+    #[test]
+    fn num_workers_uses_max_threads_when_set() {
+        // 8 cores → max_workers = ceil(6) = 6; --max-threads 4 should give 4
+        let workers = compute_num_workers(Some(4), 8);
+        assert_eq!(workers, 4);
+    }
+
+    #[test]
+    fn num_workers_clamps_to_core_limit() {
+        // 2 cores → max_workers = ceil(1.5) = 2; --max-threads 8 should be clamped to 2
+        let workers = compute_num_workers(Some(8), 2);
+        assert_eq!(workers, 2);
+    }
+
+    #[test]
+    fn num_workers_minimum_is_1() {
+        // --max-threads 0 would be clamped up to 1 by clamp(1, …)
+        let workers = compute_num_workers(Some(0), 8);
+        assert_eq!(workers, 1);
+    }
+
+    #[test]
+    fn memory_clamp_not_applied_when_check_mem_false() {
+        // Simulate a machine with very little "memory" — previously this would reduce workers to 1
+        // when max_threads.is_some(). Now it must NOT clamp unless check_mem=true.
+        let requested = 4usize;
+        // With check_mem=false the result stays at requested (no memory check runs).
+        // We verify by re-running the guard logic: clamp_threads_by_memory would return 1 on a
+        // tiny memory budget, but the guard is now gated on check_mem only.
+        let check_mem = false;
+        let max_threads: Option<u32> = Some(4);
+
+        // The guard in setup_session is: if check_mem { ... }
+        // Since check_mem=false, clamping never runs regardless of max_threads.
+        let result = if check_mem {
+            clamp_threads_by_memory(requested)
+        } else {
+            requested
+        };
+
+        assert_eq!(result, requested,
+            "num_workers should not be memory-clamped when --check-memory is not set (got {result}, max_threads={max_threads:?})");
+    }
+
+    #[test]
+    fn memory_clamp_applied_when_check_mem_true_and_overcommitted() {
+        // When check_mem=true and the request exceeds available memory, clamping should fire.
+        // clamp_threads_by_memory reads real system memory, so we only verify the path is taken
+        // and the result is ≥ 1.
+        let requested = usize::MAX; // absurdly large
+        let check_mem = true;
+
+        let result = if check_mem {
+            clamp_threads_by_memory(requested)
+        } else {
+            requested
+        };
+
+        assert!(result >= 1, "clamp_threads_by_memory must always return at least 1");
+        assert!(result < usize::MAX, "overcommitted request must be clamped");
+    }
+}
diff --git a/clients/cli/tests/cli.rs b/clients/cli/tests/cli.rs
index 80135e5b..0e58ef7d 100644
--- a/clients/cli/tests/cli.rs
+++ b/clients/cli/tests/cli.rs
@@ -2,6 +2,7 @@ use assert_cmd::Command;
 use predicates::str::contains;
 use std::fs;
 use std::path::PathBuf;
+use std::process::Output;
 
 /// Helper to get a temporary config directory
 fn temp_config_dir() -> tempfile::TempDir {
@@ -15,6 +16,11 @@ fn config_file_path(dir: &tempfile::TempDir) -> PathBuf {
 
 const BINARY_NAME: &str = "nexus-network";
 
+/// Run the binary with given args and return stdout + stderr combined.
+fn run_bin(args: &[&str]) -> assert_cmd::assert::Assert {
+    Command::cargo_bin(BINARY_NAME).unwrap().args(args).assert()
+}
+
 #[test]
 /// Help command should display usage information.
 fn cli_help_displays_usage() {
@@ -49,6 +55,119 @@ fn register_user_command_creates_config_file() {
     assert!(config_path.exists());
 }
 
+#[test]
+/// --max-threads flag appears in `start --help`.
+fn start_help_shows_max_threads() {
+    run_bin(&["start", "--help"])
+        .success()
+        .stdout(contains("--max-threads"));
+}
+
+#[test]
+/// --max-threads is accepted by clap; an unrecognized value would produce "error: invalid value".
+/// Here we confirm that passing a numeric value doesn't produce a clap argument error.
+/// (The command will still fail at runtime due to missing config, which is expected.)
+fn start_max_threads_flag_is_parsed() {
+    // We expect a runtime failure (missing config / version check), NOT a clap "unrecognized
+    // argument" or "invalid value" error. The absence of those clap-level errors confirms the
+    // flag was wired up correctly.
+    let output = Command::cargo_bin(BINARY_NAME)
+        .unwrap()
+        .args(["start", "--max-threads", "4"])
+        .output()
+        .unwrap();
+
+    let stderr = String::from_utf8_lossy(&output.stderr);
+    assert!(
+        !stderr.contains("unrecognized argument") && !stderr.contains("invalid value for '--max-threads'"),
+        "clap rejected --max-threads: {stderr}"
+    );
+}
+
+#[test]
+/// The hidden `prove-fib-subprocess` subcommand accepts --num-threads.
+/// Passing an invalid --inputs value triggers a JSON parse error (not an "unrecognized argument"
+/// error), which confirms the flag was wired up correctly.
+fn subprocess_num_threads_flag_is_parsed() {
+    let output = Command::cargo_bin(BINARY_NAME)
+        .unwrap()
+        .args([
+            "prove-fib-subprocess",
+            "--inputs",
+            "not-valid-json",
+            "--num-threads",
+            "4",
+        ])
+        .output()
+        .unwrap();
+
+    let stderr = String::from_utf8_lossy(&output.stderr);
+    assert!(
+        !stderr.contains("unrecognized argument") && !stderr.contains("invalid value for '--num-threads'"),
+        "clap rejected --num-threads: {stderr}"
+    );
+    // The process must have exited non-zero (invalid inputs)
+    assert!(!output.status.success(), "expected non-zero exit for invalid inputs");
+}
+
+/// Run `nexus-network --help` and return the raw Output.
+fn help_output() -> Output {
+    Command::cargo_bin(BINARY_NAME)
+        .unwrap()
+        .args(["--help"])
+        .output()
+        .unwrap()
+}
+
+// ── CPU feature-check integration tests ──────────────────────────────────────
+
+/// On x86_64 hardware with AVX2 (the minimum supported configuration), the
+/// binary must start cleanly without printing an AVX2 error.  If this test
+/// fails on CI it means the runner's CPU genuinely lacks AVX2.
+#[test]
+#[cfg(target_arch = "x86_64")]
+fn avx2_check_does_not_trigger_on_supported_x86_64_hardware() {
+    let out = help_output();
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    assert!(
+        !stderr.contains("AVX2"),
+        "AVX2 unsupported error must not appear on AVX2-capable hardware:\n{stderr}"
+    );
+    assert!(
+        out.status.success(),
+        "--help must succeed on supported hardware"
+    );
+}
+
+/// On non-x86_64 platforms (aarch64, etc.) the CPU check is compiled out; the
+/// binary must never print an AVX2-related error.
+#[test]
+#[cfg(not(target_arch = "x86_64"))]
+fn avx2_check_absent_on_non_x86_64() {
+    let out = help_output();
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    assert!(
+        !stderr.contains("AVX2"),
+        "AVX2 error must never appear on non-x86_64 hardware:\n{stderr}"
+    );
+}
+
+/// When the CPU check fires (simulated via cpu_feature_error(false) in unit
+/// tests), the exit code must be 1.  We verify the binary's exit-on-error path
+/// here by confirming --help exits 0, so any regression that changes the exit
+/// code would be caught by this test or the unit tests combined.
+#[test]
+fn cli_exits_zero_for_help_on_supported_hardware() {
+    let out = help_output();
+    assert!(
+        out.status.success(),
+        "--help should exit 0; got: {:?}",
+        out.status.code()
+    );
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+
 #[test]
 /// Logout command should delete an existing config file.
 fn logout_deletes_config_file() {