Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clients/cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions clients/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ postcard = "1.0.10"
prost = "0.13"
prost-types = "0.13.5"
rand = "0.8"
rayon = "1"
rand_core = "0.6"
ratatui = "0.29.0"
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
Expand Down
165 changes: 163 additions & 2 deletions clients/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,58 @@ fn print_available_difficulties() {
}
}

/// Returns the error message shown when the CPU lacks required SIMD instructions.
///
/// This is a separate function so tests can verify the message content without
/// needing to run on hardware that actually lacks the feature.
#[cfg(any(target_arch = "x86_64", test))]
fn avx2_missing_message() -> &'static str {
concat!(
"Error: Your processor does not support AVX2 instructions required by the Nexus prover.\n",
"\n",
"Your CPU is too old to run the Nexus CLI. Please use a newer machine:\n",
"\n",
" Supported processors:\n",
" Intel — 4th generation (Haswell, 2013) or newer\n",
" AMD — Ryzen / Zen architecture (2017) or newer\n",
"\n",
"If you believe your processor is supported, make sure you are running\n",
"the correct binary for your platform (x86_64 vs. ARM).",
)
}

/// Returns the AVX2 error message when `avx2_supported` is false, or `None`
/// when the CPU meets requirements.
///
/// Accepts the AVX2 detection result as a parameter so callers can inject
/// `false` in unit tests without needing hardware that lacks AVX2.
#[cfg(any(target_arch = "x86_64", test))]
fn cpu_feature_error(avx2_supported: bool) -> Option<&'static str> {
if avx2_supported { None } else { Some(avx2_missing_message()) }
}

/// Probes the running CPU and returns an error message if any required
/// instruction-set extension is missing, or `None` if all requirements are met.
///
/// On x86_64, the Nexus CLI is compiled with `-C target-cpu=native`, which
/// enables AVX2 code paths in the stwo SIMD prover backend at compile time.
/// Running the resulting binary on a CPU that lacks AVX2 causes SIGILL (reported
/// as a floating-point exception) rather than a meaningful error. Checking here
/// — before any prover code runs — lets us exit with a clear message instead.
///
/// On non-x86_64 targets (aarch64, wasm32, …) the prover uses NEON or scalar
/// paths and has no AVX2 dependency, so the check is skipped entirely.
#[cfg(target_arch = "x86_64")]
fn check_cpu_features() -> Option<&'static str> {
cpu_feature_error(is_x86_feature_detected!("avx2"))
}

/// On non-x86_64 platforms there are no AVX2 requirements; always pass.
#[cfg(not(target_arch = "x86_64"))]
fn check_cpu_features() -> Option<&'static str> {
None
}

#[derive(Parser)]
#[command(author, version = concat!(env!("CARGO_PKG_VERSION"), " (build ", env!("BUILD_TIMESTAMP"), ")"), about, long_about = None)]
/// Command-line arguments
Expand Down Expand Up @@ -152,11 +204,20 @@ enum Command {
/// Serialized inputs blob
#[arg(long)]
inputs: String,
/// Number of Rayon threads for this subprocess (0 = use Rayon default)
#[arg(long, default_value_t = 0)]
num_threads: usize,
},
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
// Check for required CPU features before anything else runs.
if let Some(msg) = check_cpu_features() {
eprintln!("{}", msg);
std::process::exit(1);
}

// Set up panic hook to prevent core dumps
std::panic::set_hook(Box::new(|panic_info| {
eprintln!("Panic occurred: {}", panic_info);
Expand Down Expand Up @@ -218,9 +279,9 @@ async fn main() -> Result<(), Box<dyn Error>> {
let orchestrator = Box::new(OrchestratorClient::new(environment));
register_node(node_id, &config_path, orchestrator).await
}
Command::ProveFibSubprocess { inputs } => {
Command::ProveFibSubprocess { inputs, num_threads } => {
let inputs: (u32, u32, u32) = serde_json::from_str(&inputs)?;
match ProvingEngine::prove_fib_subprocess(&inputs) {
match ProvingEngine::prove_fib_subprocess(&inputs, num_threads) {
Ok(proof) => {
let bytes = to_allocvec(&proof)?;
let mut out = std::io::stdout().lock();
Expand Down Expand Up @@ -358,3 +419,103 @@ mod tests {
}
}
}

#[cfg(test)]
mod cpu_check_tests {
use super::*;

// --- Message content tests (platform-independent) ---

#[test]
fn avx2_error_message_names_avx2() {
assert!(
avx2_missing_message().contains("AVX2"),
"error message must name the missing feature (AVX2)"
);
}

#[test]
fn avx2_error_message_names_intel_and_amd() {
let msg = avx2_missing_message();
assert!(msg.contains("Intel"), "error message must name Intel CPUs");
assert!(msg.contains("AMD"), "error message must name AMD CPUs");
}

#[test]
fn avx2_error_message_names_specific_cpu_generations() {
let msg = avx2_missing_message();
// Intel: Haswell or the year 2013
assert!(
msg.contains("Haswell") || msg.contains("2013"),
"error message must identify the minimum Intel generation"
);
// AMD: Ryzen, Zen, or the year 2017
assert!(
msg.contains("Ryzen") || msg.contains("Zen") || msg.contains("2017"),
"error message must identify the minimum AMD generation"
);
}

#[test]
fn avx2_error_message_explains_the_cpu_is_too_old() {
let msg = avx2_missing_message();
assert!(
msg.contains("too old") || msg.contains("older"),
"error message must explain that the CPU is too old"
);
}

// --- Logic tests via cpu_feature_error (injectable, platform-independent) ---

#[test]
fn cpu_feature_error_returns_none_when_avx2_present() {
assert!(
cpu_feature_error(true).is_none(),
"cpu_feature_error should return None when AVX2 is available"
);
}

#[test]
fn cpu_feature_error_returns_message_when_avx2_absent() {
let result = cpu_feature_error(false);
assert!(
result.is_some(),
"cpu_feature_error should return Some(msg) when AVX2 is absent"
);
// Verify the returned message is the AVX2 error (not empty or a placeholder)
let msg = result.unwrap();
assert!(
msg.contains("AVX2"),
"returned error must describe the missing feature"
);
}

// --- Platform-specific runtime tests ---

/// On x86_64, check_cpu_features() must pass on any machine capable of
/// running this test binary. If this fails the developer's CPU genuinely
/// lacks AVX2 and the machine is unsupported.
#[test]
#[cfg(target_arch = "x86_64")]
fn cpu_check_passes_on_avx2_capable_x86_64() {
if is_x86_feature_detected!("avx2") {
assert!(
check_cpu_features().is_none(),
"check_cpu_features() must return None on an AVX2-capable x86_64 CPU"
);
}
// If the machine truly lacks AVX2, the check returning Some(_) is correct
// behaviour; we don't fail the test in that case.
}

/// On non-x86_64 architectures the check is compiled out entirely and must
/// always return None regardless of what features the CPU exposes.
#[test]
#[cfg(not(target_arch = "x86_64"))]
fn cpu_check_is_skipped_on_non_x86_64() {
assert!(
check_cpu_features().is_none(),
"check_cpu_features() must return None on non-x86_64 platforms"
);
}
}
10 changes: 9 additions & 1 deletion clients/cli/src/prover/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ impl ProvingEngine {
}

/// Subprocess entrypoint: generate proof without verification
pub fn prove_fib_subprocess(inputs: &(u32, u32, u32)) -> Result<Proof, ProverError> {
pub fn prove_fib_subprocess(inputs: &(u32, u32, u32), num_threads: usize) -> Result<Proof, ProverError> {
if num_threads > 0 {
let _ = rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.build_global();
}
let prover = Self::create_fib_prover()?;
let (view, proof) = prover
.prove_with_input::<(), (u32, u32, u32)>(&(), inputs)
Expand All @@ -53,13 +58,16 @@ impl ProvingEngine {
task: &Task,
environment: &Environment,
client_id: &str,
num_threads: usize,
) -> Result<Proof, ProverError> {
// Spawn a subprocess for proof generation to isolate memory usage
let exe_path = env::current_exe()?;
let mut cmd = tokio::process::Command::new(exe_path);
cmd.arg("prove-fib-subprocess")
.arg("--inputs")
.arg(serde_json::to_string(inputs)?)
.arg("--num-threads")
.arg(num_threads.to_string())
.stdout(Stdio::piped())
.stderr(Stdio::inherit());

Expand Down
1 change: 1 addition & 0 deletions clients/cli/src/prover/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ impl ProvingPipeline {
&task_ref,
&environment_ref,
&client_id_ref,
num_workers,
)
.await?;

Expand Down
83 changes: 81 additions & 2 deletions clients/cli/src/session/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@ pub async fn setup_session(
let max_workers = ((total_cores as f64 * 0.75).ceil() as usize).max(1);
let mut num_workers: usize = max_threads.unwrap_or(1).clamp(1, max_workers as u32) as usize;

// Check memory and clamp threads if max-threads was explicitly set OR check-memory flag is set
if max_threads.is_some() || check_mem {
// Check memory and clamp threads only when --check-memory is explicitly requested.
// When the user explicitly sets --max-threads, trust their hardware knowledge.
if check_mem {
let memory_clamped_workers = clamp_threads_by_memory(num_workers);
if memory_clamped_workers < num_workers {
crate::print_cmd_warn!(
Expand Down Expand Up @@ -166,3 +167,81 @@ pub async fn setup_session(
num_workers,
})
}

#[cfg(test)]
mod tests {
use super::*;

fn compute_num_workers(max_threads: Option<u32>, total_cores: usize) -> usize {
let max_workers = ((total_cores as f64 * 0.75).ceil() as usize).max(1);
max_threads.unwrap_or(1).clamp(1, max_workers as u32) as usize
}

#[test]
fn num_workers_defaults_to_1_without_flag() {
let workers = compute_num_workers(None, 8);
assert_eq!(workers, 1);
}

#[test]
fn num_workers_uses_max_threads_when_set() {
// 8 cores → max_workers = ceil(6) = 6; --max-threads 4 should give 4
let workers = compute_num_workers(Some(4), 8);
assert_eq!(workers, 4);
}

#[test]
fn num_workers_clamps_to_core_limit() {
// 2 cores → max_workers = ceil(1.5) = 2; --max-threads 8 should be clamped to 2
let workers = compute_num_workers(Some(8), 2);
assert_eq!(workers, 2);
}

#[test]
fn num_workers_minimum_is_1() {
// --max-threads 0 would be clamped up to 1 by clamp(1, …)
let workers = compute_num_workers(Some(0), 8);
assert_eq!(workers, 1);
}

#[test]
fn memory_clamp_not_applied_when_check_mem_false() {
// Simulate a machine with very little "memory" — previously this would reduce workers to 1
// when max_threads.is_some(). Now it must NOT clamp unless check_mem=true.
let requested = 4usize;
// With check_mem=false the result stays at requested (no memory check runs).
// We verify by re-running the guard logic: clamp_threads_by_memory would return 1 on a
// tiny memory budget, but the guard is now gated on check_mem only.
let check_mem = false;
let max_threads: Option<u32> = Some(4);

// The guard in setup_session is: if check_mem { ... }
// Since check_mem=false, clamping never runs regardless of max_threads.
let result = if check_mem {
clamp_threads_by_memory(requested)
} else {
requested
};

assert_eq!(result, requested,
"num_workers should not be memory-clamped when --check-memory is not set (got {result}, max_threads={max_threads:?})");
}

#[test]
fn memory_clamp_applied_when_check_mem_true_and_overcommitted() {
// When check_mem=true and the request exceeds available memory, clamping should fire.
// clamp_threads_by_memory reads real system memory, so we only verify the path is taken
// and the result is ≥ 1.
let requested = usize::MAX; // absurdly large
let check_mem = true;

let result = if check_mem {
clamp_threads_by_memory(requested)
} else {
requested
};

assert!(result >= 1, "clamp_threads_by_memory must always return at least 1");
assert!(result < usize::MAX, "overcommitted request must be clamped");
}
}
Loading