diff --git a/Cargo.toml b/Cargo.toml
index 1b9306cec..b7431fcc8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,7 @@ tokio = { version = "1.0", default-features = false, features = [
   "rt",
 ], optional = true }
 async-std = { version = "1.9", optional = true }
+alloca = "0.3.4"
 
 [dependencies.plotters]
 version          = "^0.3.1"
@@ -61,14 +62,7 @@ futures    = { version = "0.3", default_features = false, features = ["executor"
 maintenance = { status = "passively-maintained" }
 
 [features]
-stable = [
-  "csv_output",
-  "html_reports",
-  "async_futures",
-  "async_smol",
-  "async_tokio",
-  "async_std",
-]
+stable = ["csv_output", "html_reports", "async_futures", "async_smol", "async_tokio", "async_std"]
 default = ["rayon", "plotters", "cargo_bench_support"]
 
 # Enable use of the nightly-only test::black_box function to discourage compiler optimizations.
diff --git a/src/routine.rs b/src/routine.rs
index b03b171d0..2cf2d53cb 100644
--- a/src/routine.rs
+++ b/src/routine.rs
@@ -242,15 +242,31 @@ where
             elapsed_time: Duration::from_millis(0),
         };
 
-        iters
-            .iter()
-            .map(|iters| {
+        let mut results = Vec::with_capacity(iters.len());
+        results.resize(iters.len(), 0.0);
+        for (i, iters) in iters.iter().enumerate() {
+            let stack_alloc = i % 4096; // default page size
+            #[cfg(any(target_family = "unix", target_family = "windows"))]
+            {
+                alloca::with_alloca(
+                    stack_alloc, /* how much bytes we want to allocate */
+                    |_memory: &mut [core::mem::MaybeUninit<u8>] /* dynamically stack allocated slice itself */| {
+                        b.iters = *iters;
+                        (*f)(&mut b, black_box(parameter));
+                        b.assert_iterated();
+                        results[i] = m.to_f64(&b.value);
+                    },
+                );
+            }
+            #[cfg(not(any(target_family = "unix", target_family = "windows")))]
+            {
                 b.iters = *iters;
                 (*f)(&mut b, black_box(parameter));
                 b.assert_iterated();
-                m.to_f64(&b.value)
-            })
-            .collect()
+                results[i] = m.to_f64(&b.value);
+            }
+        }
+        results
     }
 
     fn warm_up(&mut self, m: &M, how_long: Duration, parameter: &T) -> (u64, u64) {
@@ -277,6 +293,8 @@ where
             }
 
             b.iters = b.iters.wrapping_mul(2);
+            b.iters = b.iters.min(64); // To make sure we offset the test at least with 0-64 bytes
+                                       // wit alloca
         }
     }
 }