scroll-tech · spherel · May 2, 2025 · Mar 24, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -36,6 +36,7 @@ cfg-if = "1.0"
 criterion = { version = "0.5", features = ["html_reports"] }
 crossbeam-channel = "0.5"
 itertools = "0.13"
+ndarray = "*"
 num-bigint = { version = "0.4.6" }
 num-derive = "0.4"
 num-traits = "0.2"

diff --git a/gkr_iop/Cargo.toml b/gkr_iop/Cargo.toml
@@ -14,10 +14,23 @@ ark-std.workspace = true
 ff_ext = { path = "../ff_ext" }
 itertools.workspace = true
 multilinear_extensions = { version = "0.1.0", path = "../multilinear_extensions" }
+ndarray.workspace = true
 p3-field.workspace = true
 p3-goldilocks.workspace = true
 rand.workspace = true
 rayon.workspace = true
 subprotocols = { path = "../subprotocols" }
 thiserror = "1"
+tiny-keccak.workspace = true
 transcript = { path = "../transcript" }
+
+[dev-dependencies]
+criterion.workspace = true
+
+[[bench]]
+harness = false
+name = "keccak_f"
+
+[[bench]]
+harness = false
+name = "faster_keccak"
diff --git a/gkr_iop/benches/faster_keccak.rs b/gkr_iop/benches/faster_keccak.rs
@@ -0,0 +1,38 @@
+use std::time::Duration;
+
+use criterion::*;
+use gkr_iop::precompiles::run_faster_keccakf;
+
+use rand::{Rng, SeedableRng};
+criterion_group!(benches, keccak_f_fn);
+criterion_main!(benches);
+
+const NUM_SAMPLES: usize = 10;
+
+fn keccak_f_fn(c: &mut Criterion) {
+    // expand more input size once runtime is acceptable
+    let mut group = c.benchmark_group(format!("keccak_f"));
+    group.sample_size(NUM_SAMPLES);
+
+    // Benchmark the proving time
+    group.bench_function(BenchmarkId::new("keccak_f", format!("keccak_f")), |b| {
+        b.iter_custom(|iters| {
+            let mut time = Duration::new(0, 0);
+            for _ in 0..iters {
+                // Use seeded rng for debugging convenience
+                let mut rng = rand::rngs::StdRng::seed_from_u64(42);
+                let state1: [u64; 25] = std::array::from_fn(|_| rng.gen());
+                let state2: [u64; 25] = std::array::from_fn(|_| rng.gen());
+
+                let instant = std::time::Instant::now();
+                let _ = black_box(run_faster_keccakf(vec![state1, state2], false, false));
+                let elapsed = instant.elapsed();
+                time += elapsed;
+            }
+
+            time
+        });
+    });
+
+    group.finish();
+}
diff --git a/gkr_iop/benches/keccak_f.rs b/gkr_iop/benches/keccak_f.rs
@@ -0,0 +1,38 @@
+use std::time::Duration;
+
+use criterion::*;
+use gkr_iop::precompiles::{run_faster_keccakf, run_keccakf};
+use p3_field::extension::BinomialExtensionField;
+use p3_goldilocks::Goldilocks;
+use rand::{Rng, SeedableRng};
+criterion_group!(benches, keccak_f_fn);
+criterion_main!(benches);
+
+const NUM_SAMPLES: usize = 10;
+
+fn keccak_f_fn(c: &mut Criterion) {
+    // expand more input size once runtime is acceptable
+    let mut group = c.benchmark_group(format!("keccak_f"));
+    group.sample_size(NUM_SAMPLES);
+
+    // Benchmark the proving time
+    group.bench_function(BenchmarkId::new("keccak_f", format!("keccak_f")), |b| {
+        b.iter_custom(|iters| {
+            let mut time = Duration::new(0, 0);
+            for _ in 0..iters {
+                // Use seeded rng for debugging convenience
+                let mut rng = rand::rngs::StdRng::seed_from_u64(42);
+                let state: [u64; 25] = std::array::from_fn(|_| rng.gen());
+
+                let instant = std::time::Instant::now();
+                let _ = black_box(run_keccakf(state, false, false));
+                let elapsed = instant.elapsed();
+                time += elapsed;
+            }
+
+            time
+        });
+    });
+
+    group.finish();
+}
diff --git a/gkr_iop/examples/multi_layer_logup.rs b/gkr_iop/examples/multi_layer_logup.rs
@@ -2,18 +2,18 @@ use std::{marker::PhantomData, mem, sync::Arc};
 
 use ff_ext::ExtensionField;
 use gkr_iop::{
-    ProtocolBuilder, ProtocolWitnessGenerator,
     chip::Chip,
     evaluation::{EvalExpression, PointAndEval},
     gkr::{
-        GKRCircuitWitness, GKRProverOutput,
         layer::{Layer, LayerType, LayerWitness},
+        GKRCircuitWitness, GKRProverOutput,
     },
+    ProtocolBuilder, ProtocolWitnessGenerator,
 };
-use itertools::{Itertools, izip};
-use p3_field::{PrimeCharacteristicRing, extension::BinomialExtensionField};
+use itertools::{izip, Itertools};
+use p3_field::{extension::BinomialExtensionField, PrimeCharacteristicRing};
 use p3_goldilocks::Goldilocks;
-use rand::{Rng, rngs::OsRng};
+use rand::{rngs::OsRng, Rng};
 use subprotocols::expression::{Constant, Expression};
 use transcript::{BasicTranscript, Transcript};
 
@@ -64,14 +64,15 @@ impl<E: ExtensionField> ProtocolBuilder for TowerChipLayout<E> {
         let height = self.params.height;
         let lookup_challenge = Expression::Const(self.lookup_challenge.clone());
 
-        self.output_cumulative_sum = chip.allocate_output_evals();
+        self.output_cumulative_sum = chip.allocate_output_evals::<2>().try_into().unwrap();
 
         // Tower layers
         let ([updated_table, count], challenges) = (0..height).fold(
             (self.output_cumulative_sum.clone(), vec![]),
             |([den, num], challenges), i| {
                 let [den_0, den_1, num_0, num_1] = if i == height - 1 {
-                    // Allocate witnesses in the extension field, except numerator inputs in the base field.
+                    // Allocate witnesses in the extension field, except numerator inputs in the
+                    // base field.
                     let ([num_0, num_1], [den_0, den_1]) = chip.allocate_wits_in_layer();
                     [den_0, den_1, num_0, num_1]
                 } else {
@@ -86,17 +87,20 @@ impl<E: ExtensionField> ProtocolBuilder for TowerChipLayout<E> {
                     num_1.0.into(),
                 ];
                 let (in_bases, in_exts) = if i == height - 1 {
-                    (vec![num_0.1.clone(), num_1.1.clone()], vec![
-                        den_0.1.clone(),
-                        den_1.1.clone(),
-                    ])
+                    (
+                        vec![num_0.1.clone(), num_1.1.clone()],
+                        vec![den_0.1.clone(), den_1.1.clone()],
+                    )
                 } else {
-                    (vec![], vec![
-                        den_0.1.clone(),
-                        den_1.1.clone(),
-                        num_0.1.clone(),
-                        num_1.1.clone(),
-                    ])
+                    (
+                        vec![],
+                        vec![
+                            den_0.1.clone(),
+                            den_1.1.clone(),
+                            num_0.1.clone(),
+                            num_1.1.clone(),
+                        ],
+                    )
                 };
                 chip.add_layer(Layer::new(
                     format!("Tower_layer_{}", i),
@@ -109,6 +113,7 @@ impl<E: ExtensionField> ProtocolBuilder for TowerChipLayout<E> {
                     in_bases,
                     in_exts,
                     vec![den, num],
+                    vec![],
                 ));
                 let [challenge] = chip.allocate_challenges();
                 (
@@ -138,6 +143,7 @@ impl<E: ExtensionField> ProtocolBuilder for TowerChipLayout<E> {
             vec![table.1.clone()],
             vec![],
             vec![updated_table],
+            vec![],
         ));
 
         chip.allocate_base_opening(self.committed_table_id, table.1);

diff --git a/gkr_iop/src/chip/builder.rs b/gkr_iop/src/chip/builder.rs
@@ -1,5 +1,6 @@
 use std::array;
 
+use itertools::Itertools;
 use subprotocols::expression::{Constant, Witness};
 
 use crate::{
@@ -22,10 +23,11 @@ impl Chip {
         array::from_fn(|i| i + self.n_committed_exts - N)
     }
 
-    /// Allocate `Witness` and `EvalExpression` for the input polynomials in a layer.
-    /// Where `Witness` denotes the index and `EvalExpression` denotes the position
-    /// to place the evaluation of the polynomial after processing the layer prover
-    /// for each polynomial. This should be called at most once for each layer!
+    /// Allocate `Witness` and `EvalExpression` for the input polynomials in a
+    /// layer. Where `Witness` denotes the index and `EvalExpression`
+    /// denotes the position to place the evaluation of the polynomial after
+    /// processing the layer prover for each polynomial. This should be
+    /// called at most once for each layer!
     #[allow(clippy::type_complexity)]
     pub fn allocate_wits_in_layer<const M: usize, const N: usize>(
         &mut self,
@@ -51,9 +53,16 @@ impl Chip {
     }
 
     /// Generate the evaluation expression for each output.
-    pub fn allocate_output_evals<const N: usize>(&mut self) -> [EvalExpression; N] {
+    pub fn allocate_output_evals<const N: usize>(&mut self) -> Vec<EvalExpression>
+// -> [EvalExpression; N]
+    {
         self.n_evaluations += N;
-        array::from_fn(|i| EvalExpression::Single(i + self.n_evaluations - N))
+        //array::from_fn(|i| EvalExpression::Single(i + self.n_evaluations - N))
+        // TODO: hotfix to avoid stack overflow, fix later
+        (0..N)
+            .into_iter()
+            .map(|i| EvalExpression::Single(i + self.n_evaluations - N))
+            .collect_vec()
     }
 
     /// Allocate challenges.
@@ -62,14 +71,16 @@ impl Chip {
         array::from_fn(|i| Constant::Challenge(i + self.n_challenges - N))
     }
 
-    /// Allocate a PCS opening action to a base polynomial with index `wit_index`.
-    /// The `EvalExpression` represents the expression to compute the evaluation.
+    /// Allocate a PCS opening action to a base polynomial with index
+    /// `wit_index`. The `EvalExpression` represents the expression to
+    /// compute the evaluation.
     pub fn allocate_base_opening(&mut self, wit_index: usize, eval: EvalExpression) {
         self.base_openings.push((wit_index, eval));
     }
 
-    /// Allocate a PCS opening action to an ext polynomial with index `wit_index`.
-    /// The `EvalExpression` represents the expression to compute the evaluation.
+    /// Allocate a PCS opening action to an ext polynomial with index
+    /// `wit_index`. The `EvalExpression` represents the expression to
+    /// compute the evaluation.
     pub fn allocate_ext_opening(&mut self, wit_index: usize, eval: EvalExpression) {
         self.ext_openings.push((wit_index, eval));
     }