From d0af36cce3994e2c6fde5b37f174d52c2db53e85 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 26 May 2023 13:16:29 +0800 Subject: [PATCH 1/4] replace collect with Vec::reserve the iter collect pollutes the call stack (10 levels off calls) --- src/routine.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/routine.rs b/src/routine.rs index b03b171d0..fa20c539d 100644 --- a/src/routine.rs +++ b/src/routine.rs @@ -242,15 +242,15 @@ where elapsed_time: Duration::from_millis(0), }; - iters - .iter() - .map(|iters| { - b.iters = *iters; - (*f)(&mut b, black_box(parameter)); - b.assert_iterated(); - m.to_f64(&b.value) - }) - .collect() + let mut results = Vec::with_capacity(iters.len()); + results.resize(iters.len(), 0.0); + for (i, iters) in iters.iter().enumerate() { + b.iters = *iters; + (*f)(&mut b, black_box(parameter)); + b.assert_iterated(); + results[i] = m.to_f64(&b.value); + } + results } fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64) { From cf60ffc4b975b54355af6985919abc09cb97104d Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 26 May 2023 13:25:44 +0800 Subject: [PATCH 2/4] offset tests with alloca --- Cargo.toml | 10 ++-------- src/routine.rs | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1b9306cec..fb2f9beda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ tokio = { version = "1.0", default-features = false, features = [ "rt", ], optional = true } async-std = { version = "1.9", optional = true } +alloca = "0.3.3" [dependencies.plotters] version = "^0.3.1" @@ -61,14 +62,7 @@ futures = { version = "0.3", default_features = false, features = ["executor" maintenance = { status = "passively-maintained" } [features] -stable = [ - "csv_output", - "html_reports", - "async_futures", - "async_smol", - "async_tokio", - "async_std", -] +stable = ["csv_output", "html_reports", "async_futures", "async_smol", "async_tokio", "async_std"] default = ["rayon", "plotters", "cargo_bench_support"] # Enable use of the nightly-only test::black_box function to discourage compiler optimizations. diff --git a/src/routine.rs b/src/routine.rs index fa20c539d..e90e06fe4 100644 --- a/src/routine.rs +++ b/src/routine.rs @@ -245,10 +245,16 @@ where let mut results = Vec::with_capacity(iters.len()); results.resize(iters.len(), 0.0); for (i, iters) in iters.iter().enumerate() { - b.iters = *iters; - (*f)(&mut b, black_box(parameter)); - b.assert_iterated(); - results[i] = m.to_f64(&b.value); + let stack_alloc = i % 2048; + alloca::with_alloca( + stack_alloc, /* how much bytes we want to allocate */ + |_memory: &mut [core::mem::MaybeUninit] /* dynamically stack allocated slice itself */| { + b.iters = *iters; + (*f)(&mut b, black_box(parameter)); + b.assert_iterated(); + results[i] = m.to_f64(&b.value); + }, + ); } results } @@ -277,6 +283,8 @@ where } b.iters = b.iters.wrapping_mul(2); + b.iters = b.iters.min(64); // To make sure we offset the test at least with 0-64 bytes + // wit alloca } } } From 17159c763cf57417ade45932ef98d5cb54cc0585 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Sat, 27 May 2023 00:10:08 +0800 Subject: [PATCH 3/4] use alloca only in windows and unix --- Cargo.toml | 2 +- src/routine.rs | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fb2f9beda..b7431fcc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,7 @@ tokio = { version = "1.0", default-features = false, features = [ "rt", ], optional = true } async-std = { version = "1.9", optional = true } -alloca = "0.3.3" +alloca = "0.3.4" [dependencies.plotters] version = "^0.3.1" diff --git a/src/routine.rs b/src/routine.rs index e90e06fe4..9edf874a1 100644 --- a/src/routine.rs +++ b/src/routine.rs @@ -246,15 +246,25 @@ where results.resize(iters.len(), 0.0); for (i, iters) in iters.iter().enumerate() { let stack_alloc = i % 2048; - alloca::with_alloca( - stack_alloc, /* how much bytes we want to allocate */ - |_memory: &mut [core::mem::MaybeUninit] /* dynamically stack allocated slice itself */| { - b.iters = *iters; - (*f)(&mut b, black_box(parameter)); - b.assert_iterated(); - results[i] = m.to_f64(&b.value); - }, - ); + #[cfg(any(target_family = "unix", target_family = "windows"))] + { + alloca::with_alloca( + stack_alloc, /* how much bytes we want to allocate */ + |_memory: &mut [core::mem::MaybeUninit] /* dynamically stack allocated slice itself */| { + b.iters = *iters; + (*f)(&mut b, black_box(parameter)); + b.assert_iterated(); + results[i] = m.to_f64(&b.value); + }, + ); + } + #[cfg(not(any(target_family = "unix", target_family = "windows")))] + { + b.iters = *iters; + (*f)(&mut b, black_box(parameter)); + b.assert_iterated(); + results[i] = m.to_f64(&b.value); + } } results } From e6f98eefc5864d0906c51e1967116e7e04d8301e Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Mon, 29 May 2023 17:27:06 +0800 Subject: [PATCH 4/4] limit stackalloc to page size 4096 --- src/routine.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/routine.rs b/src/routine.rs index 9edf874a1..2cf2d53cb 100644 --- a/src/routine.rs +++ b/src/routine.rs @@ -245,7 +245,7 @@ where let mut results = Vec::with_capacity(iters.len()); results.resize(iters.len(), 0.0); for (i, iters) in iters.iter().enumerate() { - let stack_alloc = i % 2048; + let stack_alloc = i % 4096; // default page size #[cfg(any(target_family = "unix", target_family = "windows"))] { alloca::with_alloca(