From f33113c1f0f6ffaa4b2e0d634881305cfb70e9fe Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Mon, 4 Jul 2022 21:05:11 +0000
Subject: [PATCH 1/9] Add cost model for prover time and memory usage.

---
 .gitignore                                    |   2 +
 .../examples/simple-example-cost-model.rs     | 241 ++++++++++++
 halo2_proofs/src/dev.rs                       |   3 +
 halo2_proofs/src/dev/cost_model.rs            | 346 ++++++++++++++++++
 halo2_proofs/src/plonk.rs                     |   5 +
 halo2_proofs/src/poly/domain.rs               |  27 ++
 6 files changed, 624 insertions(+)
 create mode 100644 halo2_proofs/examples/simple-example-cost-model.rs
 create mode 100644 halo2_proofs/src/dev/cost_model.rs
diff --git a/.gitignore b/.gitignore
index 1bd93e286e..75d96298ed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 Cargo.lock
 .vscode
 **/*.html
+output/
+run-cost-model.sh
diff --git a/halo2_proofs/examples/simple-example-cost-model.rs b/halo2_proofs/examples/simple-example-cost-model.rs
new file mode 100644
index 0000000000..14fae89e05
--- /dev/null
+++ b/halo2_proofs/examples/simple-example-cost-model.rs
@@ -0,0 +1,241 @@
+use halo2_proofs::{
+    arithmetic::FieldExt,
+    circuit::{Cell, Layouter, SimpleFloorPlanner},
+    plonk::*,
+    poly::{commitment::Params, commitment::ParamsVerifier, Rotation},
+    transcript::{Blake2bRead, Blake2bWrite, Challenge255}, cost_model_main,
+};
+use pairing::bn256::{Bn256, Fr as Fp, G1Affine};
+use rand_core::OsRng;
+
+use std::marker::PhantomData;
+
+#[derive(Clone)]
+struct PlonkConfig {
+    a: Column<Advice>,
+    b: Column<Advice>,
+    c: Column<Advice>,
+
+    sa: Column<Fixed>,
+    sb: Column<Fixed>,
+    sc: Column<Fixed>,
+    sm: Column<Fixed>,
+}
+
+trait StandardCs<FF: FieldExt> {
+    fn raw_multiply<F>(
+        &self,
+        layouter: &mut impl Layouter<FF>,
+        f: F,
+    ) -> Result<(Cell, Cell, Cell), Error>
+    where
+        F: FnMut() -> Result<(FF, FF, FF), Error>;
+    fn raw_add<F>(
+        &self,
+        layouter: &mut impl Layouter<FF>,
+        f: F,
+    ) -> Result<(Cell, Cell, Cell), Error>
+    where
+        F: FnMut() -> Result<(FF, FF, FF), Error>;
+    fn copy(&self, layouter: &mut impl Layouter<FF>, a: Cell, b: Cell) -> Result<(), Error>;
+}
+
+#[derive(Clone)]
+struct MyCircuit<F: FieldExt> {
+    a: Option<F>,
+    k: u32,
+}
+
+struct StandardPlonk<F: FieldExt> {
+    config: PlonkConfig,
+    _marker: PhantomData<F>,
+}
+
+impl<FF: FieldExt> StandardPlonk<FF> {
+    fn new(config: PlonkConfig) -> Self {
+        StandardPlonk {
+            config,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<FF: FieldExt> StandardCs<FF> for StandardPlonk<FF> {
+    fn raw_multiply<F>(
+        &self,
+        layouter: &mut impl Layouter<FF>,
+        mut f: F,
+    ) -> Result<(Cell, Cell, Cell), Error>
+    where
+        F: FnMut() -> Result<(FF, FF, FF), Error>,
+    {
+        layouter.assign_region(
+            || "mul",
+            |mut region| {
+                let mut values = None;
+                let lhs = region.assign_advice(
+                    || "lhs",
+                    self.config.a,
+                    0,
+                    || {
+                        values = Some(f()?);
+                        Ok(values.ok_or(Error::Synthesis)?.0)
+                    },
+                )?;
+                let rhs = region.assign_advice(
+                    || "rhs",
+                    self.config.b,
+                    0,
+                    || Ok(values.ok_or(Error::Synthesis)?.1),
+                )?;
+
+                let out = region.assign_advice(
+                    || "out",
+                    self.config.c,
+                    0,
+                    || Ok(values.ok_or(Error::Synthesis)?.2),
+                )?;
+
+                region.assign_fixed(|| "a", self.config.sa, 0, || Ok(FF::zero()))?;
+                region.assign_fixed(|| "b", self.config.sb, 0, || Ok(FF::zero()))?;
+                region.assign_fixed(|| "c", self.config.sc, 0, || Ok(FF::one()))?;
+                region.assign_fixed(|| "a * b", self.config.sm, 0, || Ok(FF::one()))?;
+
+                Ok((lhs.cell(), rhs.cell(), out.cell()))
+            },
+        )
+    }
+
+    fn raw_add<F>(
+        &self,
+        layouter: &mut impl Layouter<FF>,
+        mut f: F,
+    ) -> Result<(Cell, Cell, Cell), Error>
+    where
+        F: FnMut() -> Result<(FF, FF, FF), Error>,
+    {
+        layouter.assign_region(
+            || "mul",
+            |mut region| {
+                let mut values = None;
+                let lhs = region.assign_advice(
+                    || "lhs",
+                    self.config.a,
+                    0,
+                    || {
+                        values = Some(f()?);
+                        Ok(values.ok_or(Error::Synthesis)?.0)
+                    },
+                )?;
+                let rhs = region.assign_advice(
+                    || "rhs",
+                    self.config.b,
+                    0,
+                    || Ok(values.ok_or(Error::Synthesis)?.1),
+                )?;
+
+                let out = region.assign_advice(
+                    || "out",
+                    self.config.c,
+                    0,
+                    || Ok(values.ok_or(Error::Synthesis)?.2),
+                )?;
+
+                region.assign_fixed(|| "a", self.config.sa, 0, || Ok(FF::one()))?;
+                region.assign_fixed(|| "b", self.config.sb, 0, || Ok(FF::one()))?;
+                region.assign_fixed(|| "c", self.config.sc, 0, || Ok(FF::one()))?;
+                region.assign_fixed(|| "a * b", self.config.sm, 0, || Ok(FF::zero()))?;
+
+                Ok((lhs.cell(), rhs.cell(), out.cell()))
+            },
+        )
+    }
+
+    fn copy(&self, layouter: &mut impl Layouter<FF>, left: Cell, right: Cell) -> Result<(), Error> {
+        layouter.assign_region(
+            || "copy",
+            |mut region| {
+                region.constrain_equal(left, right)?;
+                region.constrain_equal(left, right)
+            },
+        )
+    }
+}
+
+impl<F: FieldExt> Circuit<F> for MyCircuit<F> {
+    type Config = PlonkConfig;
+    type FloorPlanner = SimpleFloorPlanner;
+
+    fn without_witnesses(&self) -> Self {
+        Self { a: None, k: self.k }
+    }
+
+    fn configure(meta: &mut ConstraintSystem<F>) -> PlonkConfig {
+        let a = meta.advice_column();
+        let b = meta.advice_column();
+        let c = meta.advice_column();
+
+        meta.enable_equality(a);
+        meta.enable_equality(b);
+        meta.enable_equality(c);
+
+        let sm = meta.fixed_column();
+        let sa = meta.fixed_column();
+        let sb = meta.fixed_column();
+        let sc = meta.fixed_column();
+
+        meta.create_gate("mini plonk", |meta| {
+            let a = meta.query_advice(a, Rotation::cur());
+            let b = meta.query_advice(b, Rotation::cur());
+            let c = meta.query_advice(c, Rotation::cur());
+
+            let sa = meta.query_fixed(sa, Rotation::cur());
+            let sb = meta.query_fixed(sb, Rotation::cur());
+            let sc = meta.query_fixed(sc, Rotation::cur());
+            let sm = meta.query_fixed(sm, Rotation::cur());
+
+            vec![a.clone() * sa + b.clone() * sb + a * b * sm + (c * sc * (-F::one()))]
+        });
+
+        PlonkConfig {
+            a,
+            b,
+            c,
+            sa,
+            sb,
+            sc,
+            sm,
+            // perm,
+        }
+    }
+
+    fn synthesize(&self, config: PlonkConfig, mut layouter: impl Layouter<F>) -> Result<(), Error> {
+        let cs = StandardPlonk::new(config);
+
+        for _ in 0..1 << ((self.k - 1) - 3) {
+            let mut a_squared = None;
+            let (a0, _, c0) = cs.raw_multiply(&mut layouter, || {
+                a_squared = self.a.map(|a| a.square());
+                Ok((
+                    self.a.ok_or(Error::Synthesis)?,
+                    self.a.ok_or(Error::Synthesis)?,
+                    a_squared.ok_or(Error::Synthesis)?,
+                ))
+            })?;
+            let (a1, b1, _) = cs.raw_add(&mut layouter, || {
+                let fin = a_squared.and_then(|a2| self.a.map(|a| a + a2));
+                Ok((
+                    self.a.ok_or(Error::Synthesis)?,
+                    a_squared.ok_or(Error::Synthesis)?,
+                    fin.ok_or(Error::Synthesis)?,
+                ))
+            })?;
+            cs.copy(&mut layouter, a0, a1)?;
+            cs.copy(&mut layouter, b1, c0)?;
+        }
+
+        Ok(())
+    }
+}
+
+cost_model_main!(MyCircuit::<Fp>{a: Some(Fp::from(5)), k: 8});
diff --git a/halo2_proofs/src/dev.rs b/halo2_proofs/src/dev.rs
index ce8327974a..650d768094 100644
--- a/halo2_proofs/src/dev.rs
+++ b/halo2_proofs/src/dev.rs
@@ -24,6 +24,9 @@ mod util;
 pub mod cost;
 pub use cost::CircuitCost;
 
+pub mod cost_model;
+pub use cost_model::*;
+
 mod gates;
 pub use gates::CircuitGates;
 
diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
new file mode 100644
index 0000000000..e9d2cb0399
--- /dev/null
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -0,0 +1,346 @@
+//! Circuit cost model.
+//! usage: ${binary_file} estimate ${k}
+use std::{
+    time::Instant, io, fs, collections::BTreeMap,
+};
+
+use crate::{
+    arithmetic::{Field, CurveAffine, Engine},
+    circuit::{Cell, Layouter, SimpleFloorPlanner},
+    plonk::*,
+    poly::{commitment::Params, commitment::ParamsVerifier, EvaluationDomain, Rotation},
+    transcript::{Blake2bRead, Blake2bWrite, Challenge255},
+};
+use group::{prime::PrimeCurveAffine, GroupEncoding};
+use pairing::bn256::{Bn256, Fr as Fp, G1Affine};
+use rand_core::OsRng;
+
+fn measure_elapsed_time<T, F: FnOnce() -> T>(f: F) -> (f64, T) {
+    let start = Instant::now();
+    let res = f();
+    (start.elapsed().as_secs_f64(), res)
+}
+
+/// EstimateResult is to store the output of estimate()
+#[derive(Debug)]
+pub struct EstimateResult {
+    prover_time: f64,
+    mem_usage: f64,
+    aggregate_circuit_size: usize,
+}
+
+impl EstimateResult {
+    /// print estimation result.
+    pub fn print(&self) {
+        println!("prover time = {} (s)", self.prover_time);
+        println!("memory usage = {} (KB)", self.mem_usage);
+        // println!("aggregate circuit size = {}", aggregate_circuit_size);
+    }
+}
+
+/// estimate is to estimate the prover time, peek memory usage and aggregate circuit size.
+pub fn estimate<
+    E: Engine,
+    ConcreteCircuit: Circuit<E::Scalar>,
+>(
+    circuit: ConcreteCircuit, 
+    res_1: SimLinearResult,
+    res_2: SimLinearResult,
+    k: usize,
+) -> EstimateResult {
+    // NOTE(sphere): init params
+    // Initialize the polynomial commitment parameters
+    let mut cs = ConstraintSystem::default();
+    let config = ConcreteCircuit::configure(&mut cs);
+
+    let generate_fake_params = |k| {
+        let s = E::Scalar::random(OsRng);
+        let rand_c1 = <E::G1Affine as PrimeCurveAffine>::generator() * s;
+        let rand_c2 = <E::G2Affine as PrimeCurveAffine>::generator() * s;
+        let rand_c1: E::G1Affine = rand_c1.into();
+        let n = 1 << k;
+        Params {
+            k: k as u32,
+            n: n as u64,
+            g: (0..n).map(|_| rand_c1.clone()).collect(),
+            g_lagrange: (0..n).map(|_| rand_c1.clone()).collect(),
+            additional_data: Vec::from(rand_c2.to_bytes().as_ref()),
+        }
+    };
+
+    let estimate_pt_non_linear = |k| {
+
+        let params = generate_fake_params(k);
+        
+        // Initialize the domain
+        let domain = EvaluationDomain::fake_new(cs.degree() as u32, params.k, E::Scalar::random(OsRng));
+        
+        // NOTE(sphere): count function call
+        let FuncCount { num_fft, num_extended_fft, num_msm, num_btree } = dummy_proof::<E::G1Affine, ConcreteCircuit>(
+            &params,
+            &cs,
+            &domain,
+        );
+        
+        let n = 1 << k as usize;
+        let rand_vec: Vec::<E::Scalar> = (0..n).map(|_| E::Scalar::random(&mut OsRng)).collect();
+        let rand_vec2 = rand_vec.clone();
+        let rand_values = domain.lagrange_from_vec(rand_vec);
+
+        // NOTE(sphere): estimate opr time
+        //      msm
+        let (time_msm, _) = measure_elapsed_time(|| params.commit_lagrange(&rand_values));
+        //      fft
+        let (time_fft, rand_poly) = measure_elapsed_time(|| domain.lagrange_to_coeff(rand_values));
+        //      extended fft
+        let (time_extended_fft, _) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
+        //      BTree time cost in lookup argument
+        let (time_btree, _) = measure_elapsed_time(|| {
+            let mut leftover_table_map: BTreeMap<E::Scalar, u32> = rand_vec2
+            .iter().take(n)
+            .fold(BTreeMap::new(), |mut acc, coeff| {
+                *acc.entry(*coeff).or_insert(0) += 1;
+                acc
+            });
+            for item in rand_vec2 {
+                if let Some(count) = leftover_table_map.get_mut(&item) {
+                    *count -= 1;
+                }
+            }
+        });
+        println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
+        println!("num_extended_fft = {}, time_extended_fft = {}", num_extended_fft, time_extended_fft);
+        println!("num_msm = {}, time_msm = {}", num_msm, time_msm);
+        println!("num_btree = {}, time_btree = {}", num_fft, time_fft);
+        
+        let pt_non_linear = (num_fft as f64) * time_fft +
+                             (num_extended_fft as f64) * time_extended_fft +
+                             (num_msm as f64) * time_msm +
+                             (num_btree as f64) * time_btree;
+        println!("pt_non_linear = {}", pt_non_linear);
+        println!("");
+        pt_non_linear
+    };
+
+    let prover_time = estimate_pt_non_linear(k);
+
+    let calc_linear_term = |x_1: f64, y_1: f64, x_2: f64, y_2: f64, x_3 :f64| {
+        y_1 + (y_2 - y_1) / (x_2 - x_1) * (x_3 - x_1)
+    };
+
+    let mem_usage = calc_linear_term(
+        (1 << res_1.k) as f64, res_1.mem_usage,
+        (1 << res_2.k) as f64, res_2.mem_usage,
+        (1 << k) as f64,
+    );
+
+    // NOTE(sphere): calculate aggregate_circuit_size
+    
+    EstimateResult {
+        prover_time,
+        mem_usage,
+        aggregate_circuit_size: 0,
+    }
+}
+
+/// SimLinearResult is to store the result of simulate.
+#[derive(Debug)]
+pub struct SimLinearResult {
+    k: usize,
+    // prover_time: f64,
+    mem_usage: f64,
+}
+
+impl SimLinearResult {
+    /// read is to read SimLinearResult from a file.
+    pub fn read(filepath: String) -> SimLinearResult {
+        let data = fs::read_to_string(filepath).expect("read failed");
+        let mut data = data.split_whitespace();
+        let k = data.next().unwrap().parse().expect("k parse failed");
+        // let prover_time = data.next().unwrap().parse().expect("prover_time (s) parse failed");
+        let mem_usage = data.next().unwrap().parse().expect("mem_usage (KB) parse in failed");
+        SimLinearResult {
+            k,
+            // prover_time,
+            mem_usage,
+        }
+    }
+
+    /// create new `SimLinearResult`.
+    pub fn new(k: usize, mem_usage: f64) -> Self {
+        SimLinearResult {
+            k,
+            mem_usage,
+        }
+    }
+}
+
+/// simulate_circuit is to run a circuit proving process.
+pub fn simulate_circuit<
+    E: Engine,
+    ConcreteCircuit: Circuit<E::Scalar>,
+>(circuit: ConcreteCircuit, k: usize) {
+    // let public_inputs_size = 0;
+
+    // Initialize the polynomial commitment parameters
+    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(k as u32);
+    // let params_verifier: ParamsVerifier<E> = params.verifier(public_inputs_size).unwrap();
+
+    // Initialize the proving key
+    let vk = keygen_vk(&params, &circuit).expect("keygen_vk should not fail");
+    let pk = keygen_pk(&params, vk, &circuit).expect("keygen_pk should not fail");
+
+    // Create a proof
+    let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
+
+    let (prover_time, _) = measure_elapsed_time(|| 
+        create_proof(&params, &pk, &[circuit], &[&[]], OsRng, &mut transcript)
+        .expect("proof generation should not fail")
+    );
+
+    // NOTE(liutainyi): output prover_time
+    println!("{}\n{}", k, prover_time);
+
+    let proof = transcript.finalize();
+
+    // let strategy = SingleVerifier::new(&params_verifier);
+    // let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
+
+    // verify_proof(
+    //     &params_verifier,
+    //     pk.get_vk(),
+    //     strategy,
+    //     &[&[]],
+    //     &mut transcript,
+    // )
+    // .unwrap();
+}
+
+struct FuncCount {
+    num_fft: usize, 
+    num_extended_fft: usize,
+    num_msm: usize,
+    num_btree: usize,
+}
+
+fn dummy_proof<C: CurveAffine, ConcreteCircuit: Circuit<C::Scalar>>(
+    params: &Params<C>,
+    cs: &ConstraintSystem<C::Scalar>,
+    domain: &EvaluationDomain<C::Scalar>,
+) -> FuncCount {
+    let l = 1;
+    let mut num_fft = 0 as usize;
+    let mut num_extended_fft = 0 as usize;
+    let mut num_msm = 0 as usize;
+    let mut num_btree = 0 as usize;
+
+    // (instance, advice) calculate (poly, coset, commitment)
+
+    // NOTE(sphere): ins_commit, pt += l * n_ins * commit_lagrange_t
+    num_msm += l * cs.num_instance_columns;
+    // NOTE(sphere): ins_poly, pt += l * n_ins + lagrange_to_coeff_t
+    num_fft += l * cs.num_instance_columns;
+    // NOTE(sphere): ins_coset, pt += l * n_ins + coeff_to_extended_t
+    num_extended_fft += l * cs.num_instance_columns;
+    // NOTE(sphere): adv_commit, pt += l * n_adv * commit_lagrange_t
+    num_msm += l * cs.num_advice_columns;
+    // NOTE(sphere): adv_poly, pt += l * n_adv * lagrange_to_coeff_t
+    num_fft += l * cs.num_advice_columns;
+    // NOTE(sphere): adv_coset, pt += l * n_adv * coeff_to_extended_t
+    num_extended_fft += l * cs.num_advice_columns;
+
+    // NOTE(sphere): pt += l * n_lookup * commit_permuted
+    //      NOTE(sphere): BTree cost for A' and S'.
+    let num_lookups = cs.lookups.len();
+    num_btree += l * num_lookups;
+
+    // Commit to permutations.
+    // NOTE(sphere): l * perm_commit_t
+    //      commit_lagrange: z
+    let num_perm_slices = (cs.permutation.get_columns().len() + (cs.degree() - 1)) / (cs.degree() - 2);
+    num_msm += num_perm_slices;
+    //      lagrange_to_coeff: z
+    num_fft += num_perm_slices;
+    //      coeff_to_extended: z
+    num_extended_fft += num_perm_slices;
+    
+    // NOTE(sphere): pt += lookup_commit_product
+    //      commit_lagrange: z
+    num_msm += num_lookups;
+    //      lagrange_to_coeff: z
+    num_fft += num_lookups;
+
+    // Commit to the vanishing argument's random polynomial for blinding h(x_3)
+    // NOTE(sphere): vanishing_commit
+    //      commit: random_poly
+    num_msm += 1;
+
+    // Evaluate the h(X) polynomial
+    // NOTE(sphere): evaluate_h 3 coeff_to_extended for each lookup argument
+    num_extended_fft += 3 * num_lookups;
+
+    // Construct the vanishing argument's h(X) commitments
+    // NOTE(sphere): pt += vanishing_construct
+    //      extended_to_coeff: h_poly
+    num_extended_fft +=  1;
+    //      commit: h_poly_i
+    let num_h_pieces = ((domain.extended_len() as u64 + params.n - 1) / params.n) as usize;
+    num_msm += num_h_pieces;
+
+    // NOTE(sphere): evaluating ins / adv / fix only contains linear_terms.
+
+    // NOTE(sphere): vanishing_evaluate only contains linear_terms.
+
+    // NOTE(sphere): permutation_evaluate only contains linear_terms.
+
+    // NOTE(sphere): permutation_construct_evaluate only contains linear_terms.
+
+    // NOTE(sphere): lookups_evaluate only contains linear_terms.
+
+    // NOTE(sphere): sum up number of queries.
+
+    // NOTE(sphere): multiopen(shplonk).
+    //      commit: h_x, h_x
+    num_msm += 2;
+    FuncCount {
+        num_fft, 
+        num_extended_fft,
+        num_msm,
+        num_btree,
+    }
+}
+
+
+/// cost_model_main is to generate a main function to run the cost model for a circuit.
+#[macro_export]
+macro_rules! cost_model_main {
+    ($cir:expr) => {
+        use halo2_proofs::dev::{
+            simulate_circuit,
+            SimLinearResult,
+            estimate,
+        };
+
+        fn main() {
+            // NOTE(sphere): get k from args
+            let mode = std::env::args().nth(1).expect("no running-mode given");
+            let k = std::env::args().nth(2).expect("no circuit size given").parse().unwrap();
+            // NOTE(sphere): estimate linear cost (cfg == simulate)
+            let circuit = $cir;
+            if mode.eq(&String::from("simulate")) {
+                simulate_circuit::<Bn256, _>(circuit, k);
+            } else if mode.eq(&String::from("estimate")) {
+                // let res_path_1 = std::env::args().nth(3).expect("no circuit size given").parse().unwrap();
+                // let res_path_2 = std::env::args().nth(4).expect("no circuit size given").parse().unwrap();
+                // let res_1 = SimLinearResult::read(res_path_1);
+                // let res_2 = SimLinearResult::read(res_path_2);
+                let res_1 = SimLinearResult::new(10, 6292.0);
+                let res_2 = SimLinearResult::new(14, 50092.0);
+                let res = estimate::<Bn256, _>(circuit, res_1, res_2, k);
+                res.print();
+            } else {
+                panic!("unrecognized format");
+            }
+        }
+    }
+}
diff --git a/halo2_proofs/src/plonk.rs b/halo2_proofs/src/plonk.rs
index 1f7cb9301c..971dc10d09 100644
--- a/halo2_proofs/src/plonk.rs
+++ b/halo2_proofs/src/plonk.rs
@@ -154,6 +154,11 @@ impl<C: CurveAffine> VerifyingKey<C> {
     pub fn get_domain(&self) -> &EvaluationDomain<C::Scalar> {
         &self.domain
     }
+
+    /// Get the underlying [`ConstraintSystem`].
+    pub fn get_cs(&self) -> &ConstraintSystem<C::Scalar> {
+        &self.cs
+    }
 }
 
 #[derive(Clone, Copy, Debug)]
diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs
index 5204ebef26..cafa9b5cf4 100644
--- a/halo2_proofs/src/poly/domain.rs
+++ b/halo2_proofs/src/poly/domain.rs
@@ -141,6 +141,33 @@ impl<G: Group> EvaluationDomain<G> {
         }
     }
 
+    /// Generate a fake domain.
+    pub fn fake_new(j: u32, k: u32, rand_scalar: G::Scalar) -> Self {
+        let n = 1 << k;
+        let quotient_poly_degree = (j - 1) as i32;
+        let mut extended_k = k;
+        while (1 << extended_k) < (n * quotient_poly_degree) {
+            extended_k += 1;
+        }
+
+        EvaluationDomain {
+            n: n as u64,
+            k: k as u32,
+            extended_k: extended_k as u32,
+            omega: rand_scalar.clone(),
+            omega_inv: rand_scalar.clone(),
+            extended_omega: rand_scalar.clone(),
+            extended_omega_inv: rand_scalar.clone(),
+            g_coset: rand_scalar,
+            g_coset_inv: rand_scalar.clone(),
+            quotient_poly_degree: quotient_poly_degree as u64,
+            ifft_divisor: rand_scalar.clone(),
+            extended_ifft_divisor: rand_scalar.clone(),
+            t_evaluations: (1..(1 << (extended_k - k))).map(|_| rand_scalar.clone()).collect(),
+            barycentric_weight: rand_scalar.clone(),
+        }
+    }
+
     /// Obtains a polynomial in Lagrange form when given a vector of Lagrange
     /// coefficients of size `n`; panics if the provided vector is the wrong
     /// length.

From eae3a58e6eff48ba9ff3a39790f83f74d51fbe29 Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Tue, 5 Jul 2022 22:13:02 +0000
Subject: [PATCH 2/9] Add cost model for prover time and memory usage

---
 .../examples/simple-example-cost-model.rs        |  6 ++----
 halo2_proofs/src/dev/cost_model.rs               | 16 ++++++++--------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/halo2_proofs/examples/simple-example-cost-model.rs b/halo2_proofs/examples/simple-example-cost-model.rs
index 14fae89e05..2b7b213a97 100644
--- a/halo2_proofs/examples/simple-example-cost-model.rs
+++ b/halo2_proofs/examples/simple-example-cost-model.rs
@@ -2,11 +2,9 @@ use halo2_proofs::{
     arithmetic::FieldExt,
     circuit::{Cell, Layouter, SimpleFloorPlanner},
     plonk::*,
-    poly::{commitment::Params, commitment::ParamsVerifier, Rotation},
-    transcript::{Blake2bRead, Blake2bWrite, Challenge255}, cost_model_main,
+    poly::Rotation, cost_model_main,
 };
-use pairing::bn256::{Bn256, Fr as Fp, G1Affine};
-use rand_core::OsRng;
+use pairing::bn256::{Bn256, Fr as Fp};
 
 use std::marker::PhantomData;
 
diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index e9d2cb0399..ae6d4d37cc 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -111,7 +111,7 @@ pub fn estimate<
         println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
         println!("num_extended_fft = {}, time_extended_fft = {}", num_extended_fft, time_extended_fft);
         println!("num_msm = {}, time_msm = {}", num_msm, time_msm);
-        println!("num_btree = {}, time_btree = {}", num_fft, time_fft);
+        println!("num_btree = {}, time_btree = {}", num_btree, time_btree);
         
         let pt_non_linear = (num_fft as f64) * time_fft +
                              (num_extended_fft as f64) * time_extended_fft +
@@ -199,7 +199,7 @@ pub fn simulate_circuit<
     );
 
     // NOTE(liutainyi): output prover_time
-    println!("{}\n{}", k, prover_time);
+    // println!("{}\n{}", k, prover_time);
 
     let proof = transcript.finalize();
 
@@ -330,12 +330,12 @@ macro_rules! cost_model_main {
             if mode.eq(&String::from("simulate")) {
                 simulate_circuit::<Bn256, _>(circuit, k);
             } else if mode.eq(&String::from("estimate")) {
-                // let res_path_1 = std::env::args().nth(3).expect("no circuit size given").parse().unwrap();
-                // let res_path_2 = std::env::args().nth(4).expect("no circuit size given").parse().unwrap();
-                // let res_1 = SimLinearResult::read(res_path_1);
-                // let res_2 = SimLinearResult::read(res_path_2);
-                let res_1 = SimLinearResult::new(10, 6292.0);
-                let res_2 = SimLinearResult::new(14, 50092.0);
+                let k1 = std::env::args().nth(3).expect("no k1 given").parse().unwrap();
+                let mem1: u64 = std::env::args().nth(4).expect("no mem1 given").parse().unwrap();
+                let k2 = std::env::args().nth(5).expect("no k2 given").parse().unwrap();
+                let mem2: u64 = std::env::args().nth(6).expect("no mem2 given").parse().unwrap();
+                let res_1 = SimLinearResult::new(k1, mem1 as f64);
+                let res_2 = SimLinearResult::new(k2, mem2 as f64);
                 let res = estimate::<Bn256, _>(circuit, res_1, res_2, k);
                 res.print();
             } else {

From 82635781950da28b3b8f6b6cdd584518ec291921 Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Wed, 6 Jul 2022 00:57:33 +0000
Subject: [PATCH 3/9] Add cost model.

---
 halo2_proofs/src/dev/cost_model.rs | 425 +++++++++++++++++++----------
 halo2_proofs/src/plonk.rs          |   8 +-
 2 files changed, 284 insertions(+), 149 deletions(-)

diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index ae6d4d37cc..21df4f509a 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -1,21 +1,24 @@
 //! Circuit cost model.
-//! usage: ${binary_file} estimate ${k}
 use std::{
-    time::Instant, io, fs, collections::BTreeMap,
+    time::Instant, io, fs, collections::BTreeMap, mem,
 };
 
 use crate::{
-    arithmetic::{Field, CurveAffine, Engine},
+    arithmetic::{Field, CurveAffine, Engine, eval_polynomial},
     circuit::{Cell, Layouter, SimpleFloorPlanner},
     plonk::*,
     poly::{commitment::Params, commitment::ParamsVerifier, EvaluationDomain, Rotation},
-    transcript::{Blake2bRead, Blake2bWrite, Challenge255},
+    transcript::{Blake2bRead, Blake2bWrite, Challenge255}, multicore,
 };
 use group::{prime::PrimeCurveAffine, GroupEncoding};
 use pairing::bn256::{Bn256, Fr as Fp, G1Affine};
 use rand_core::OsRng;
+use rayon::current_num_threads;
 
-fn measure_elapsed_time<T, F: FnOnce() -> T>(f: F) -> (f64, T) {
+use super::CircuitCost;
+
+/// measure the elapsed time.
+pub fn measure_elapsed_time<T, F: FnOnce() -> T>(f: F) -> (f64, T) {
     let start = Instant::now();
     let res = f();
     (start.elapsed().as_secs_f64(), res)
@@ -26,7 +29,6 @@ fn measure_elapsed_time<T, F: FnOnce() -> T>(f: F) -> (f64, T) {
 pub struct EstimateResult {
     prover_time: f64,
     mem_usage: f64,
-    aggregate_circuit_size: usize,
 }
 
 impl EstimateResult {
@@ -38,20 +40,139 @@ impl EstimateResult {
     }
 }
 
+impl Calculation {
+    fn fake_evaluate<F: Field>(
+        &self,
+    ) -> usize {
+        match self {
+            Calculation::Add(_, _) => 0,
+            Calculation::Sub(_, _) => 0, 
+            Calculation::Mul(_, _) => 1,
+            Calculation::Negate(_) => 0,
+            Calculation::LcBeta(_, _) => 1,
+            Calculation::LcTheta(_, _) => 1,
+            Calculation::AddGamma(_) => 0,
+            Calculation::Store(_) => 0,
+        }
+    }
+}
+
+impl<C: CurveAffine> Evaluator<C> {
+    fn fake_evaluate_h(
+        &self,
+        pk: &ProvingKey<C>,
+        l: usize,
+    ) -> usize {
+        let cs = pk.get_vk().get_cs();
+        let mut num_mul = 0;
+        // All calculations, with cached intermediate results
+        for calc in self.calculations.iter() {
+            let tmp_num_mul = calc.calculation.fake_evaluate::<C::Scalar>();
+            num_mul += tmp_num_mul;
+        }
+
+        // Accumulate value parts
+        num_mul += self.value_parts.len();
+
+        for table_result in self.lookup_results.iter() {
+            let tmp_num_mul = table_result.fake_evaluate::<C::Scalar>();
+            num_mul += tmp_num_mul;
+        }
+
+        // Permutations
+        let chunk_len = cs.degree() - 2;
+        let num_perm_slices = (cs.permutation.get_columns().len() + chunk_len - 1) / chunk_len;
+
+        // Enforce only for the first set.
+        // l_0(X) * (1 - z_0(X)) = 0
+        num_mul += 2;
+
+        // Enforce only for the last set.
+        // l_last(X) * (z_l(X)^2 - z_l(X)) = 0
+        num_mul += 3;
+
+        // Except for the first set, enforce.
+        // l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X)) = 0
+        if num_perm_slices > 0 {
+            num_mul += 2 * (num_perm_slices - 1);
+        }
+            
+        // delta_start * beta_start
+        num_mul += 1;
+        // And for all the sets we enforce:
+        // (1 - (l_last(X) + l_blind(X))) * (
+        //   z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
+        // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
+        // )
+        num_mul = {
+            // Calculate left = z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
+            let mut tmp_num_mul = 0;
+            tmp_num_mul += 2 * chunk_len;
+            // Calculate right = z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma), current_delta *= DELTA
+            tmp_num_mul += chunk_len;
+            tmp_num_mul += chunk_len;
+            // Merge (1 - (l_last(X) + l_blind(X))) * (
+            //   z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
+            // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
+            // ) into h.
+            tmp_num_mul += 2;
+            num_mul + tmp_num_mul * num_perm_slices
+        };
+        // beta_term *= &extended_omega;
+        if num_perm_slices > 0usize {
+            num_mul += 1;
+        }
+
+        // Lookups
+        // l_0(X) * (1 - z(X)) = 0, 2 add, 2 mul
+        // l_last(X) * (z(X)^2 - z(X)) = 0, 2 add, 3 mul
+        // (1 - (l_last(X) + l_blind(X))) * (
+        //   z(\omega X) (a'(X) + \beta) (s'(X) + \gamma)
+        //   - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta)
+        //          (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma)
+        // ) = 0,  4 add, 5 mul
+        // l_0(X) * (a'(X) - s'(X)) = 0, 1 add, 2 mul
+        // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0, 2 add, 3 mul
+        let num_lookups = pk.get_vk().get_cs().lookups.len();
+        // l_0(X) * (1 - z(X)) = 0
+        num_mul += 2 * num_lookups;
+        // l_last(X) * (z(X)^2 - z(X)) = 0
+        num_mul += 3 * num_lookups;
+        // (1 - (l_last(X) + l_blind(X))) * (
+        //   z(\omega X) (a'(X) + \beta) (s'(X) + \gamma)
+        //   - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta)
+        //          (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma)
+        // ) = 0
+        num_mul += 5 * num_lookups;
+        // l_0(X) * (a'(X) - s'(X)) = 0
+        num_mul += 2 * num_lookups;
+        // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0
+        num_mul += 3 * num_lookups;
+
+        num_mul *= l;
+
+        num_mul
+    }
+}
+
 /// estimate is to estimate the prover time, peek memory usage and aggregate circuit size.
 pub fn estimate<
     E: Engine,
     ConcreteCircuit: Circuit<E::Scalar>,
 >(
-    circuit: ConcreteCircuit, 
-    res_1: SimLinearResult,
-    res_2: SimLinearResult,
+    circuit: ConcreteCircuit,
     k: usize,
 ) -> EstimateResult {
+    // Generate small vk & pk
+    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(15 as u32);
+    let vk = keygen_vk(&params, &circuit).expect("keygen_vk should not fail");
+    let pk = keygen_pk(&params, vk, &circuit).expect("keygen_pk should not fail");
+
+    let l = 1;
+
     // NOTE(sphere): init params
     // Initialize the polynomial commitment parameters
-    let mut cs = ConstraintSystem::default();
-    let config = ConcreteCircuit::configure(&mut cs);
+    let cs = pk.get_vk().get_cs();
 
     let generate_fake_params = |k| {
         let s = E::Scalar::random(OsRng);
@@ -68,110 +189,107 @@ pub fn estimate<
         }
     };
 
-    let estimate_pt_non_linear = |k| {
-
-        let params = generate_fake_params(k);
-        
-        // Initialize the domain
-        let domain = EvaluationDomain::fake_new(cs.degree() as u32, params.k, E::Scalar::random(OsRng));
-        
-        // NOTE(sphere): count function call
-        let FuncCount { num_fft, num_extended_fft, num_msm, num_btree } = dummy_proof::<E::G1Affine, ConcreteCircuit>(
-            &params,
-            &cs,
-            &domain,
-        );
-        
-        let n = 1 << k as usize;
-        let rand_vec: Vec::<E::Scalar> = (0..n).map(|_| E::Scalar::random(&mut OsRng)).collect();
-        let rand_vec2 = rand_vec.clone();
-        let rand_values = domain.lagrange_from_vec(rand_vec);
-
-        // NOTE(sphere): estimate opr time
-        //      msm
-        let (time_msm, _) = measure_elapsed_time(|| params.commit_lagrange(&rand_values));
-        //      fft
-        let (time_fft, rand_poly) = measure_elapsed_time(|| domain.lagrange_to_coeff(rand_values));
-        //      extended fft
-        let (time_extended_fft, _) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
-        //      BTree time cost in lookup argument
-        let (time_btree, _) = measure_elapsed_time(|| {
-            let mut leftover_table_map: BTreeMap<E::Scalar, u32> = rand_vec2
-            .iter().take(n)
-            .fold(BTreeMap::new(), |mut acc, coeff| {
-                *acc.entry(*coeff).or_insert(0) += 1;
-                acc
-            });
-            for item in rand_vec2 {
-                if let Some(count) = leftover_table_map.get_mut(&item) {
-                    *count -= 1;
-                }
+    let params = generate_fake_params(k);
+    
+    // Initialize the domain
+    let domain = EvaluationDomain::fake_new(cs.degree() as u32, params.k, E::Scalar::random(OsRng));
+    
+    let n = 1 << k as usize;
+    let rand_ele = E::Scalar::random(&mut OsRng);
+    let rand_vec: Vec::<E::Scalar> = (0..n).map(|_| rand_ele.clone()).collect();
+    let rand_vec2 = rand_vec.clone();
+    let rand_values = domain.lagrange_from_vec(rand_vec);
+
+    // NOTE(sphere): estimate op time
+    //      msm
+    let (time_msm, _) = measure_elapsed_time(|| params.commit_lagrange(&rand_values));
+    //      fft
+    let (time_fft, rand_poly) = measure_elapsed_time(|| domain.lagrange_to_coeff(rand_values));
+    //      extended fft
+    let (time_extended_fft, _) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
+    //      BTree time cost in lookup argument
+    let (time_btree, _) = measure_elapsed_time(|| {
+        let mut leftover_table_map: BTreeMap<E::Scalar, u32> = rand_vec2
+        .iter().take(n)
+        .fold(BTreeMap::new(), |mut acc, coeff| {
+            *acc.entry(*coeff).or_insert(0) += 1;
+            acc
+        });
+        for item in &rand_vec2 {
+            if let Some(count) = leftover_table_map.get_mut(item) {
+                *count -= 1;
+            }
+        }
+    });
+
+    let num_threads = multicore::current_num_threads();
+
+    // NOTE(sphere): estimate op count
+    let FuncCount { num_fft, num_extended_fft, num_msm, num_btree, num_mul, mem_usage} = dummy_proof(
+        &params,
+        &pk,
+        &domain,
+        l,
+    );
+
+    let estimate_add_mul_field_op_time = || {
+        let m = (domain.extended_len() + num_threads - 1) / num_threads;
+        let a = rand_ele.clone();
+        let mut b = rand_ele.clone();
+        //      m mul field ops
+        let (time_mul, _) = measure_elapsed_time(|| {
+            for _ in 0..m {
+                b = a * b;
             }
+            b
         });
-        println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
-        println!("num_extended_fft = {}, time_extended_fft = {}", num_extended_fft, time_extended_fft);
-        println!("num_msm = {}, time_msm = {}", num_msm, time_msm);
-        println!("num_btree = {}, time_btree = {}", num_btree, time_btree);
-        
-        let pt_non_linear = (num_fft as f64) * time_fft +
-                             (num_extended_fft as f64) * time_extended_fft +
-                             (num_msm as f64) * time_msm +
-                             (num_btree as f64) * time_btree;
-        println!("pt_non_linear = {}", pt_non_linear);
-        println!("");
-        pt_non_linear
+        println!("num_mul = {}, time_mul = {}", num_mul, time_mul);
+        (num_mul as f64) * time_mul
     };
 
-    let prover_time = estimate_pt_non_linear(k);
+    println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
+    println!("num_extended_fft = {}, time_extended_fft = {}", num_extended_fft, time_extended_fft);
+    println!("num_msm = {}, time_msm = {}", num_msm, time_msm);
+    println!("num_btree = {}, time_btree = {}", num_btree, time_btree);
+    
+
+    let pt_non_linear = (num_fft as f64) * time_fft +
+                            (num_extended_fft as f64) * time_extended_fft +
+                            (num_msm as f64) * time_msm +
+                            (num_btree as f64) * time_btree;
+    println!("pt_non_linear = {}", pt_non_linear);
 
-    let calc_linear_term = |x_1: f64, y_1: f64, x_2: f64, y_2: f64, x_3 :f64| {
-        y_1 + (y_2 - y_1) / (x_2 - x_1) * (x_3 - x_1)
-    };
+    let pt_linear = estimate_add_mul_field_op_time();
+    println!("pt_linear = {}", pt_linear);
 
-    let mem_usage = calc_linear_term(
-        (1 << res_1.k) as f64, res_1.mem_usage,
-        (1 << res_2.k) as f64, res_2.mem_usage,
-        (1 << k) as f64,
-    );
+    let (pt_random, _) = measure_elapsed_time(|| {
+        let mut random_poly = domain.empty_coeff();
+        for coeff in random_poly.iter_mut() {
+            *coeff = E::Scalar::random(&mut OsRng);
+        }
+        random_poly
+    });
+    println!("pt_random = {}", pt_random);
+    println!("");
+
+    let prover_time = pt_non_linear + pt_linear + pt_random;
+
+    // let calc_linear_term = |x_1: f64, y_1: f64, x_2: f64, y_2: f64, x_3 :f64| {
+    //     y_1 + (y_2 - y_1) / (x_2 - x_1) * (x_3 - x_1)
+    // };
+
+    // let mem_usage2 = calc_linear_term(
+    //     (1 << res_1.k) as f64, res_1.mem_usage,
+    //     (1 << res_2.k) as f64, res_2.mem_usage,
+    //     (1 << k) as f64,
+    // );
+    // println!("mem_usage by linear regression = {}", mem_usage2);
 
     // NOTE(sphere): calculate aggregate_circuit_size
     
     EstimateResult {
         prover_time,
-        mem_usage,
-        aggregate_circuit_size: 0,
-    }
-}
-
-/// SimLinearResult is to store the result of simulate.
-#[derive(Debug)]
-pub struct SimLinearResult {
-    k: usize,
-    // prover_time: f64,
-    mem_usage: f64,
-}
-
-impl SimLinearResult {
-    /// read is to read SimLinearResult from a file.
-    pub fn read(filepath: String) -> SimLinearResult {
-        let data = fs::read_to_string(filepath).expect("read failed");
-        let mut data = data.split_whitespace();
-        let k = data.next().unwrap().parse().expect("k parse failed");
-        // let prover_time = data.next().unwrap().parse().expect("prover_time (s) parse failed");
-        let mem_usage = data.next().unwrap().parse().expect("mem_usage (KB) parse in failed");
-        SimLinearResult {
-            k,
-            // prover_time,
-            mem_usage,
-        }
-    }
-
-    /// create new `SimLinearResult`.
-    pub fn new(k: usize, mem_usage: f64) -> Self {
-        SimLinearResult {
-            k,
-            mem_usage,
-        }
+        mem_usage: (mem_usage as f64) / 1024.0, // to KB
     }
 }
 
@@ -184,7 +302,6 @@ pub fn simulate_circuit<
 
     // Initialize the polynomial commitment parameters
     let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(k as u32);
-    // let params_verifier: ParamsVerifier<E> = params.verifier(public_inputs_size).unwrap();
 
     // Initialize the proving key
     let vk = keygen_vk(&params, &circuit).expect("keygen_vk should not fail");
@@ -199,21 +316,7 @@ pub fn simulate_circuit<
     );
 
     // NOTE(liutainyi): output prover_time
-    // println!("{}\n{}", k, prover_time);
-
-    let proof = transcript.finalize();
-
-    // let strategy = SingleVerifier::new(&params_verifier);
-    // let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-
-    // verify_proof(
-    //     &params_verifier,
-    //     pk.get_vk(),
-    //     strategy,
-    //     &[&[]],
-    //     &mut transcript,
-    // )
-    // .unwrap();
+    println!("k = {}, prover_time = {}", k, prover_time);
 }
 
 struct FuncCount {
@@ -221,19 +324,23 @@ struct FuncCount {
     num_extended_fft: usize,
     num_msm: usize,
     num_btree: usize,
+    num_mul: usize,
+    mem_usage: usize,
 }
 
-fn dummy_proof<C: CurveAffine, ConcreteCircuit: Circuit<C::Scalar>>(
+fn dummy_proof<C: CurveAffine>(
     params: &Params<C>,
-    cs: &ConstraintSystem<C::Scalar>,
+    pk: &ProvingKey<C>,
     domain: &EvaluationDomain<C::Scalar>,
+    l: usize, // The number of input.
 ) -> FuncCount {
-    let l = 1;
     let mut num_fft = 0 as usize;
     let mut num_extended_fft = 0 as usize;
     let mut num_msm = 0 as usize;
     let mut num_btree = 0 as usize;
 
+    let cs = pk.get_vk().get_cs();
+
     // (instance, advice) calculate (poly, coset, commitment)
 
     // NOTE(sphere): ins_commit, pt += l * n_ins * commit_lagrange_t
@@ -257,7 +364,7 @@ fn dummy_proof<C: CurveAffine, ConcreteCircuit: Circuit<C::Scalar>>(
     // Commit to permutations.
     // NOTE(sphere): l * perm_commit_t
     //      commit_lagrange: z
-    let num_perm_slices = (cs.permutation.get_columns().len() + (cs.degree() - 1)) / (cs.degree() - 2);
+    let num_perm_slices = (cs.permutation.get_columns().len() + (cs.degree() - 3)) / (cs.degree() - 2);
     num_msm += num_perm_slices;
     //      lagrange_to_coeff: z
     num_fft += num_perm_slices;
@@ -265,10 +372,10 @@ fn dummy_proof<C: CurveAffine, ConcreteCircuit: Circuit<C::Scalar>>(
     num_extended_fft += num_perm_slices;
     
     // NOTE(sphere): pt += lookup_commit_product
-    //      commit_lagrange: z
-    num_msm += num_lookups;
-    //      lagrange_to_coeff: z
-    num_fft += num_lookups;
+    //      commit_lagrange: z, a', s'
+    num_msm += 3 * num_lookups;
+    //      lagrange_to_coeff: z, a', s'
+    num_fft += 3 * num_lookups;
 
     // Commit to the vanishing argument's random polynomial for blinding h(x_3)
     // NOTE(sphere): vanishing_commit
@@ -277,7 +384,7 @@ fn dummy_proof<C: CurveAffine, ConcreteCircuit: Circuit<C::Scalar>>(
 
     // Evaluate the h(X) polynomial
     // NOTE(sphere): evaluate_h 3 coeff_to_extended for each lookup argument
-    num_extended_fft += 3 * num_lookups;
+    num_extended_fft += l * 3 * num_lookups;
 
     // Construct the vanishing argument's h(X) commitments
     // NOTE(sphere): pt += vanishing_construct
@@ -287,26 +394,55 @@ fn dummy_proof<C: CurveAffine, ConcreteCircuit: Circuit<C::Scalar>>(
     let num_h_pieces = ((domain.extended_len() as u64 + params.n - 1) / params.n) as usize;
     num_msm += num_h_pieces;
 
-    // NOTE(sphere): evaluating ins / adv / fix only contains linear_terms.
-
-    // NOTE(sphere): vanishing_evaluate only contains linear_terms.
-
-    // NOTE(sphere): permutation_evaluate only contains linear_terms.
-
-    // NOTE(sphere): permutation_construct_evaluate only contains linear_terms.
-
-    // NOTE(sphere): lookups_evaluate only contains linear_terms.
-
-    // NOTE(sphere): sum up number of queries.
+    // NOTE(sphere): evaluate h.
+    let num_mul = pk.get_ev().fake_evaluate_h(&pk, l);
 
     // NOTE(sphere): multiopen(shplonk).
     //      commit: h_x, h_x
+    //      The evaluations in multiopen is too small.
     num_msm += 2;
+
+    // TODO(sphere): Memory 
+    let mut mem_usage = 0 as usize;
+    //      instance / advice / fixed as value poly, and coset:
+    let n = 1 << params.k as usize;
+    let ext_n = domain.extended_len();
+    mem_usage += l * (cs.num_instance_columns + cs.num_advice_columns) * (ext_n + 2 * n);
+    mem_usage += cs.num_fixed_columns * (2 * n + ext_n);
+    //      l_0, l_last, l_active_row as coset:
+    mem_usage += 3 * ext_n;
+    //      lookup compressed_input / compressed_table as value:
+    // mem_usage += 2 * l * num_lookups * n;
+    //      lookup permuted_input / permuted_table as value:
+    // mem_usage += 2 * l * num_lookups * n;
+    //      lookup permuted_input / permuted_table as poly:
+    mem_usage += 2 * l * num_lookups * n;
+    //      lookup Z as poly
+    mem_usage += l * num_lookups * n;
+    //      permutation sigma as value, poly, and coset:
+    mem_usage += l * num_perm_slices * (2 * n + ext_n);
+    //      permutation Z as poly,, and coset
+    mem_usage += l * num_perm_slices * (n + ext_n);
+    //      vanishing random_poly
+    mem_usage += n;
+    //      evaluate_h lookup values
+    mem_usage += num_lookups * ext_n;
+    //      evaluate_h single lookup Z / permuted_input / permuted_table as coset
+    mem_usage += l * 3 * ext_n;
+    //      evaluate_h h_poly as coset
+    mem_usage += ext_n;
+
+    println!("number of field element: {}", mem_usage);
+
+    mem_usage *= mem::size_of::<C::Scalar>();
+
     FuncCount {
         num_fft, 
         num_extended_fft,
         num_msm,
         num_btree,
+        num_mul,
+        mem_usage,
     }
 }
 
@@ -317,7 +453,6 @@ macro_rules! cost_model_main {
     ($cir:expr) => {
         use halo2_proofs::dev::{
             simulate_circuit,
-            SimLinearResult,
             estimate,
         };
 
@@ -330,13 +465,7 @@ macro_rules! cost_model_main {
             if mode.eq(&String::from("simulate")) {
                 simulate_circuit::<Bn256, _>(circuit, k);
             } else if mode.eq(&String::from("estimate")) {
-                let k1 = std::env::args().nth(3).expect("no k1 given").parse().unwrap();
-                let mem1: u64 = std::env::args().nth(4).expect("no mem1 given").parse().unwrap();
-                let k2 = std::env::args().nth(5).expect("no k2 given").parse().unwrap();
-                let mem2: u64 = std::env::args().nth(6).expect("no mem2 given").parse().unwrap();
-                let res_1 = SimLinearResult::new(k1, mem1 as f64);
-                let res_2 = SimLinearResult::new(k2, mem2 as f64);
-                let res = estimate::<Bn256, _>(circuit, res_1, res_2, k);
+                let res = estimate::<Bn256, _>(circuit, k);
                 res.print();
             } else {
                 panic!("unrecognized format");
diff --git a/halo2_proofs/src/plonk.rs b/halo2_proofs/src/plonk.rs
index 971dc10d09..fc4020c3c3 100644
--- a/halo2_proofs/src/plonk.rs
+++ b/halo2_proofs/src/plonk.rs
@@ -33,10 +33,11 @@ pub use error::*;
 pub use keygen::*;
 pub use prover::*;
 pub use verifier::*;
+pub use evaluation::*;
 
 use std::io;
 
-use self::evaluation::Evaluator;
+pub use self::evaluation::Evaluator;
 
 /// This is a verifying key which allows for the verification of proofs for a
 /// particular circuit.
@@ -147,6 +148,11 @@ impl<C: CurveAffine> ProvingKey<C> {
     pub fn get_vk(&self) -> &VerifyingKey<C> {
         &self.vk
     }
+    
+    /// Get the underlying [`Evaluator`].
+    pub fn get_ev(&self) -> &Evaluator<C> {
+        &self.ev
+    }
 }
 
 impl<C: CurveAffine> VerifyingKey<C> {

From 07b954ffca7c8a2f7c804e5e086a1b13d2204dcf Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Mon, 8 Aug 2022 19:01:36 -0700
Subject: [PATCH 4/9] fix

---
 .gitignore                                    |   2 -
 .../examples/simple-example-cost-model.rs     |   8 +-
 halo2_proofs/src/dev/cost_model.rs            | 145 +++++++++---------
 halo2_proofs/src/plonk.rs                     |   4 +-
 halo2_proofs/src/poly/domain.rs               |  18 +--
 5 files changed, 88 insertions(+), 89 deletions(-)

diff --git a/.gitignore b/.gitignore
index 75d96298ed..1bd93e286e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,5 +3,3 @@
 Cargo.lock
 .vscode
 **/*.html
-output/
-run-cost-model.sh
diff --git a/halo2_proofs/examples/simple-example-cost-model.rs b/halo2_proofs/examples/simple-example-cost-model.rs
index 2b7b213a97..76e14c8f67 100644
--- a/halo2_proofs/examples/simple-example-cost-model.rs
+++ b/halo2_proofs/examples/simple-example-cost-model.rs
@@ -1,8 +1,9 @@
 use halo2_proofs::{
     arithmetic::FieldExt,
     circuit::{Cell, Layouter, SimpleFloorPlanner},
+    cost_model_main,
     plonk::*,
-    poly::Rotation, cost_model_main,
+    poly::Rotation,
 };
 use pairing::bn256::{Bn256, Fr as Fp};
 
@@ -236,4 +237,7 @@ impl<F: FieldExt> Circuit<F> for MyCircuit<F> {
     }
 }
 
-cost_model_main!(MyCircuit::<Fp>{a: Some(Fp::from(5)), k: 8});
+cost_model_main!(MyCircuit::<Fp> {
+    a: Some(Fp::from(5)),
+    k: 8
+});
diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index 21df4f509a..3c5b7ff05b 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -1,14 +1,13 @@
 //! Circuit cost model.
-use std::{
-    time::Instant, io, fs, collections::BTreeMap, mem,
-};
+use std::{collections::BTreeMap, fs, io, mem, time::Instant};
 
 use crate::{
-    arithmetic::{Field, CurveAffine, Engine, eval_polynomial},
+    arithmetic::{eval_polynomial, CurveAffine, Engine, Field},
     circuit::{Cell, Layouter, SimpleFloorPlanner},
+    multicore,
     plonk::*,
     poly::{commitment::Params, commitment::ParamsVerifier, EvaluationDomain, Rotation},
-    transcript::{Blake2bRead, Blake2bWrite, Challenge255}, multicore,
+    transcript::{Blake2bRead, Blake2bWrite, Challenge255},
 };
 use group::{prime::PrimeCurveAffine, GroupEncoding};
 use pairing::bn256::{Bn256, Fr as Fp, G1Affine};
@@ -41,12 +40,10 @@ impl EstimateResult {
 }
 
 impl Calculation {
-    fn fake_evaluate<F: Field>(
-        &self,
-    ) -> usize {
+    fn fake_evaluate<F: Field>(&self) -> usize {
         match self {
             Calculation::Add(_, _) => 0,
-            Calculation::Sub(_, _) => 0, 
+            Calculation::Sub(_, _) => 0,
             Calculation::Mul(_, _) => 1,
             Calculation::Negate(_) => 0,
             Calculation::LcBeta(_, _) => 1,
@@ -58,11 +55,7 @@ impl Calculation {
 }
 
 impl<C: CurveAffine> Evaluator<C> {
-    fn fake_evaluate_h(
-        &self,
-        pk: &ProvingKey<C>,
-        l: usize,
-    ) -> usize {
+    fn fake_evaluate_h(&self, pk: &ProvingKey<C>, l: usize) -> usize {
         let cs = pk.get_vk().get_cs();
         let mut num_mul = 0;
         // All calculations, with cached intermediate results
@@ -96,7 +89,7 @@ impl<C: CurveAffine> Evaluator<C> {
         if num_perm_slices > 0 {
             num_mul += 2 * (num_perm_slices - 1);
         }
-            
+
         // delta_start * beta_start
         num_mul += 1;
         // And for all the sets we enforce:
@@ -156,15 +149,12 @@ impl<C: CurveAffine> Evaluator<C> {
 }
 
 /// estimate is to estimate the prover time, peek memory usage and aggregate circuit size.
-pub fn estimate<
-    E: Engine,
-    ConcreteCircuit: Circuit<E::Scalar>,
->(
+pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     circuit: ConcreteCircuit,
     k: usize,
 ) -> EstimateResult {
     // Generate small vk & pk
-    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(15 as u32);
+    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(k as u32);
     let vk = keygen_vk(&params, &circuit).expect("keygen_vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("keygen_pk should not fail");
 
@@ -183,20 +173,20 @@ pub fn estimate<
         Params {
             k: k as u32,
             n: n as u64,
-            g: (0..n).map(|_| rand_c1.clone()).collect(),
-            g_lagrange: (0..n).map(|_| rand_c1.clone()).collect(),
+            g: (0..n).map(|_| rand_c1).collect(),
+            g_lagrange: (0..n).map(|_| rand_c1).collect(),
             additional_data: Vec::from(rand_c2.to_bytes().as_ref()),
         }
     };
 
     let params = generate_fake_params(k);
-    
+
     // Initialize the domain
     let domain = EvaluationDomain::fake_new(cs.degree() as u32, params.k, E::Scalar::random(OsRng));
-    
+
     let n = 1 << k as usize;
     let rand_ele = E::Scalar::random(&mut OsRng);
-    let rand_vec: Vec::<E::Scalar> = (0..n).map(|_| rand_ele.clone()).collect();
+    let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele).collect();
     let rand_vec2 = rand_vec.clone();
     let rand_values = domain.lagrange_from_vec(rand_vec);
 
@@ -209,12 +199,14 @@ pub fn estimate<
     let (time_extended_fft, _) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
     //      BTree time cost in lookup argument
     let (time_btree, _) = measure_elapsed_time(|| {
-        let mut leftover_table_map: BTreeMap<E::Scalar, u32> = rand_vec2
-        .iter().take(n)
-        .fold(BTreeMap::new(), |mut acc, coeff| {
-            *acc.entry(*coeff).or_insert(0) += 1;
-            acc
-        });
+        let mut leftover_table_map: BTreeMap<E::Scalar, u32> =
+            rand_vec2
+                .iter()
+                .take(n)
+                .fold(BTreeMap::new(), |mut acc, coeff| {
+                    *acc.entry(*coeff).or_insert(0) += 1;
+                    acc
+                });
         for item in &rand_vec2 {
             if let Some(count) = leftover_table_map.get_mut(item) {
                 *count -= 1;
@@ -225,17 +217,19 @@ pub fn estimate<
     let num_threads = multicore::current_num_threads();
 
     // NOTE(sphere): estimate op count
-    let FuncCount { num_fft, num_extended_fft, num_msm, num_btree, num_mul, mem_usage} = dummy_proof(
-        &params,
-        &pk,
-        &domain,
-        l,
-    );
+    let FuncCount {
+        num_fft,
+        num_extended_fft,
+        num_msm,
+        num_btree,
+        num_mul,
+        mem_usage,
+    } = dummy_proof(&params, &pk, &domain, l);
 
     let estimate_add_mul_field_op_time = || {
         let m = (domain.extended_len() + num_threads - 1) / num_threads;
-        let a = rand_ele.clone();
-        let mut b = rand_ele.clone();
+        let a = rand_ele;
+        let mut b = rand_ele;
         //      m mul field ops
         let (time_mul, _) = measure_elapsed_time(|| {
             for _ in 0..m {
@@ -248,15 +242,17 @@ pub fn estimate<
     };
 
     println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
-    println!("num_extended_fft = {}, time_extended_fft = {}", num_extended_fft, time_extended_fft);
+    println!(
+        "num_extended_fft = {}, time_extended_fft = {}",
+        num_extended_fft, time_extended_fft
+    );
     println!("num_msm = {}, time_msm = {}", num_msm, time_msm);
     println!("num_btree = {}, time_btree = {}", num_btree, time_btree);
-    
 
-    let pt_non_linear = (num_fft as f64) * time_fft +
-                            (num_extended_fft as f64) * time_extended_fft +
-                            (num_msm as f64) * time_msm +
-                            (num_btree as f64) * time_btree;
+    let pt_non_linear = (num_fft as f64) * time_fft
+        + (num_extended_fft as f64) * time_extended_fft
+        + (num_msm as f64) * time_msm
+        + (num_btree as f64) * time_btree;
     println!("pt_non_linear = {}", pt_non_linear);
 
     let pt_linear = estimate_add_mul_field_op_time();
@@ -270,7 +266,7 @@ pub fn estimate<
         random_poly
     });
     println!("pt_random = {}", pt_random);
-    println!("");
+    println!();
 
     let prover_time = pt_non_linear + pt_linear + pt_random;
 
@@ -286,7 +282,7 @@ pub fn estimate<
     // println!("mem_usage by linear regression = {}", mem_usage2);
 
     // NOTE(sphere): calculate aggregate_circuit_size
-    
+
     EstimateResult {
         prover_time,
         mem_usage: (mem_usage as f64) / 1024.0, // to KB
@@ -294,10 +290,10 @@ pub fn estimate<
 }
 
 /// simulate_circuit is to run a circuit proving process.
-pub fn simulate_circuit<
-    E: Engine,
-    ConcreteCircuit: Circuit<E::Scalar>,
->(circuit: ConcreteCircuit, k: usize) {
+pub fn simulate_circuit<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
+    circuit: ConcreteCircuit,
+    k: usize,
+) {
     // let public_inputs_size = 0;
 
     // Initialize the polynomial commitment parameters
@@ -310,17 +306,17 @@ pub fn simulate_circuit<
     // Create a proof
     let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
 
-    let (prover_time, _) = measure_elapsed_time(|| 
+    let (prover_time, _) = measure_elapsed_time(|| {
         create_proof(&params, &pk, &[circuit], &[&[]], OsRng, &mut transcript)
-        .expect("proof generation should not fail")
-    );
+            .expect("proof generation should not fail")
+    });
 
     // NOTE(liutainyi): output prover_time
     println!("k = {}, prover_time = {}", k, prover_time);
 }
 
 struct FuncCount {
-    num_fft: usize, 
+    num_fft: usize,
     num_extended_fft: usize,
     num_msm: usize,
     num_btree: usize,
@@ -334,10 +330,10 @@ fn dummy_proof<C: CurveAffine>(
     domain: &EvaluationDomain<C::Scalar>,
     l: usize, // The number of input.
 ) -> FuncCount {
-    let mut num_fft = 0 as usize;
-    let mut num_extended_fft = 0 as usize;
-    let mut num_msm = 0 as usize;
-    let mut num_btree = 0 as usize;
+    let mut num_fft = 0_usize;
+    let mut num_extended_fft = 0_usize;
+    let mut num_msm = 0_usize;
+    let mut num_btree = 0_usize;
 
     let cs = pk.get_vk().get_cs();
 
@@ -364,13 +360,14 @@ fn dummy_proof<C: CurveAffine>(
     // Commit to permutations.
     // NOTE(sphere): l * perm_commit_t
     //      commit_lagrange: z
-    let num_perm_slices = (cs.permutation.get_columns().len() + (cs.degree() - 3)) / (cs.degree() - 2);
+    let num_perm_slices =
+        (cs.permutation.get_columns().len() + (cs.degree() - 3)) / (cs.degree() - 2);
     num_msm += num_perm_slices;
     //      lagrange_to_coeff: z
     num_fft += num_perm_slices;
     //      coeff_to_extended: z
     num_extended_fft += num_perm_slices;
-    
+
     // NOTE(sphere): pt += lookup_commit_product
     //      commit_lagrange: z, a', s'
     num_msm += 3 * num_lookups;
@@ -389,21 +386,21 @@ fn dummy_proof<C: CurveAffine>(
     // Construct the vanishing argument's h(X) commitments
     // NOTE(sphere): pt += vanishing_construct
     //      extended_to_coeff: h_poly
-    num_extended_fft +=  1;
+    num_extended_fft += 1;
     //      commit: h_poly_i
     let num_h_pieces = ((domain.extended_len() as u64 + params.n - 1) / params.n) as usize;
     num_msm += num_h_pieces;
 
     // NOTE(sphere): evaluate h.
-    let num_mul = pk.get_ev().fake_evaluate_h(&pk, l);
+    let num_mul = pk.get_ev().fake_evaluate_h(pk, l);
 
-    // NOTE(sphere): multiopen(shplonk).
+    // TODO(sphere): multiopen(shplonk). There should be a more detailed evaluation.
     //      commit: h_x, h_x
     //      The evaluations in multiopen is too small.
     num_msm += 2;
 
-    // TODO(sphere): Memory 
-    let mut mem_usage = 0 as usize;
+    // NOTE(sphere): Memory
+    let mut mem_usage = 0_usize;
     //      instance / advice / fixed as value poly, and coset:
     let n = 1 << params.k as usize;
     let ext_n = domain.extended_len();
@@ -437,7 +434,7 @@ fn dummy_proof<C: CurveAffine>(
     mem_usage *= mem::size_of::<C::Scalar>();
 
     FuncCount {
-        num_fft, 
+        num_fft,
         num_extended_fft,
         num_msm,
         num_btree,
@@ -446,20 +443,20 @@ fn dummy_proof<C: CurveAffine>(
     }
 }
 
-
 /// cost_model_main is to generate a main function to run the cost model for a circuit.
 #[macro_export]
 macro_rules! cost_model_main {
     ($cir:expr) => {
-        use halo2_proofs::dev::{
-            simulate_circuit,
-            estimate,
-        };
+        use halo2_proofs::dev::{estimate, simulate_circuit};
 
         fn main() {
             // NOTE(sphere): get k from args
             let mode = std::env::args().nth(1).expect("no running-mode given");
-            let k = std::env::args().nth(2).expect("no circuit size given").parse().unwrap();
+            let k = std::env::args()
+                .nth(2)
+                .expect("no circuit size given")
+                .parse()
+                .unwrap();
             // NOTE(sphere): estimate linear cost (cfg == simulate)
             let circuit = $cir;
             if mode.eq(&String::from("simulate")) {
@@ -471,5 +468,5 @@ macro_rules! cost_model_main {
                 panic!("unrecognized format");
             }
         }
-    }
+    };
 }
diff --git a/halo2_proofs/src/plonk.rs b/halo2_proofs/src/plonk.rs
index fc4020c3c3..35204077e5 100644
--- a/halo2_proofs/src/plonk.rs
+++ b/halo2_proofs/src/plonk.rs
@@ -30,10 +30,10 @@ mod verifier;
 pub use assigned::*;
 pub use circuit::*;
 pub use error::*;
+pub use evaluation::*;
 pub use keygen::*;
 pub use prover::*;
 pub use verifier::*;
-pub use evaluation::*;
 
 use std::io;
 
@@ -148,7 +148,7 @@ impl<C: CurveAffine> ProvingKey<C> {
     pub fn get_vk(&self) -> &VerifyingKey<C> {
         &self.vk
     }
-    
+
     /// Get the underlying [`Evaluator`].
     pub fn get_ev(&self) -> &Evaluator<C> {
         &self.ev
diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs
index cafa9b5cf4..8d1b0a72e3 100644
--- a/halo2_proofs/src/poly/domain.rs
+++ b/halo2_proofs/src/poly/domain.rs
@@ -154,17 +154,17 @@ impl<G: Group> EvaluationDomain<G> {
             n: n as u64,
             k: k as u32,
             extended_k: extended_k as u32,
-            omega: rand_scalar.clone(),
-            omega_inv: rand_scalar.clone(),
-            extended_omega: rand_scalar.clone(),
-            extended_omega_inv: rand_scalar.clone(),
+            omega: rand_scalar,
+            omega_inv: rand_scalar,
+            extended_omega: rand_scalar,
+            extended_omega_inv: rand_scalar,
             g_coset: rand_scalar,
-            g_coset_inv: rand_scalar.clone(),
+            g_coset_inv: rand_scalar,
             quotient_poly_degree: quotient_poly_degree as u64,
-            ifft_divisor: rand_scalar.clone(),
-            extended_ifft_divisor: rand_scalar.clone(),
-            t_evaluations: (1..(1 << (extended_k - k))).map(|_| rand_scalar.clone()).collect(),
-            barycentric_weight: rand_scalar.clone(),
+            ifft_divisor: rand_scalar,
+            extended_ifft_divisor: rand_scalar,
+            t_evaluations: (1..(1 << (extended_k - k))).map(|_| rand_scalar).collect(),
+            barycentric_weight: rand_scalar,
         }
     }
 

From 7f034aafddcc6a1a67400989068d8678edcdd83c Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Sun, 11 Sep 2022 18:46:18 +0000
Subject: [PATCH 5/9] Reduce the running time and estimate the multiopen cost

---
 halo2_proofs/src/dev/cost_model.rs | 377 +++++++++++++++++++++--------
 halo2_proofs/src/poly/domain.rs    |  27 ---
 2 files changed, 273 insertions(+), 131 deletions(-)

diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index 3c5b7ff05b..c2abe127e0 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -1,8 +1,8 @@
 //! Circuit cost model.
-use std::{collections::BTreeMap, fs, io, mem, time::Instant};
+use std::{collections::BTreeMap, fs, io, iter, mem, time::Instant};
 
 use crate::{
-    arithmetic::{eval_polynomial, CurveAffine, Engine, Field},
+    arithmetic::{eval_polynomial, kate_division, CurveAffine, Engine, Field},
     circuit::{Cell, Layouter, SimpleFloorPlanner},
     multicore,
     plonk::*,
@@ -16,7 +16,7 @@ use rayon::current_num_threads;
 
 use super::CircuitCost;
 
-/// measure the elapsed time.
+/// Measures the elapsed time of a closure.
 pub fn measure_elapsed_time<T, F: FnOnce() -> T>(f: F) -> (f64, T) {
     let start = Instant::now();
     let res = f();
@@ -35,116 +35,132 @@ impl EstimateResult {
     pub fn print(&self) {
         println!("prover time = {} (s)", self.prover_time);
         println!("memory usage = {} (KB)", self.mem_usage);
-        // println!("aggregate circuit size = {}", aggregate_circuit_size);
     }
 }
 
 impl Calculation {
-    fn fake_evaluate<F: Field>(&self) -> usize {
+    // The returned argument is the number of additions and multiplications.
+    fn fake_evaluate<F: Field>(&self) -> (usize, usize) {
         match self {
-            Calculation::Add(_, _) => 0,
-            Calculation::Sub(_, _) => 0,
-            Calculation::Mul(_, _) => 1,
-            Calculation::Negate(_) => 0,
-            Calculation::LcBeta(_, _) => 1,
-            Calculation::LcTheta(_, _) => 1,
-            Calculation::AddGamma(_) => 0,
-            Calculation::Store(_) => 0,
+            Calculation::Add(_, _) => (1, 0),
+            Calculation::Sub(_, _) => (1, 0),
+            Calculation::Mul(_, _) => (0, 1),
+            Calculation::Negate(_) => (1, 0),
+            Calculation::LcBeta(_, _) => (1, 1),
+            Calculation::LcTheta(_, _) => (1, 1),
+            Calculation::AddGamma(_) => (1, 0),
+            Calculation::Store(_) => (0, 0),
         }
     }
 }
 
+struct FakeProverQuery {
+    rotation: Rotation,
+}
+
 impl<C: CurveAffine> Evaluator<C> {
-    fn fake_evaluate_h(&self, pk: &ProvingKey<C>, l: usize) -> usize {
+    // Returns the number of hadamard addition and product operations.
+    fn fake_evaluate_h(&self, pk: &ProvingKey<C>, l: usize) -> (usize, usize) {
         let cs = pk.get_vk().get_cs();
-        let mut num_mul = 0;
+        let mut num_mul_lag = 0;
+        let mut num_add_lag = 0;
         // All calculations, with cached intermediate results
         for calc in self.calculations.iter() {
-            let tmp_num_mul = calc.calculation.fake_evaluate::<C::Scalar>();
-            num_mul += tmp_num_mul;
+            let (tmp_num_add_lag, tmp_num_mul_lag) = calc.calculation.fake_evaluate::<C::Scalar>();
+            num_add_lag += tmp_num_add_lag;
+            num_mul_lag += tmp_num_mul_lag;
         }
 
         // Accumulate value parts
-        num_mul += self.value_parts.len();
+        num_add_lag += self.value_parts.len();
+        num_mul_lag += self.value_parts.len();
 
         for table_result in self.lookup_results.iter() {
-            let tmp_num_mul = table_result.fake_evaluate::<C::Scalar>();
-            num_mul += tmp_num_mul;
+            let (tmp_num_add_lag, tmp_num_mul_lag) = table_result.fake_evaluate::<C::Scalar>();
+            num_add_lag += tmp_num_add_lag;
+            num_mul_lag += tmp_num_mul_lag;
         }
 
         // Permutations
         let chunk_len = cs.degree() - 2;
         let num_perm_slices = (cs.permutation.get_columns().len() + chunk_len - 1) / chunk_len;
 
-        // Enforce only for the first set.
-        // l_0(X) * (1 - z_0(X)) = 0
-        num_mul += 2;
-
-        // Enforce only for the last set.
-        // l_last(X) * (z_l(X)^2 - z_l(X)) = 0
-        num_mul += 3;
-
-        // Except for the first set, enforce.
-        // l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X)) = 0
         if num_perm_slices > 0 {
-            num_mul += 2 * (num_perm_slices - 1);
+            // Enforce only for the first set.
+            // value(X) = value(X) * y + l_0(X) * (1 - z_0(X))
+            num_add_lag += 2;
+            num_mul_lag += 2;
+            // Enforce only for the last set.
+            // value(X) = value(X) * y + l_last(X) * (z_l(X)^2 - z_l(X))
+            num_add_lag += 2;
+            num_mul_lag += 3;
+            // Except for the first set, enforce.
+            // value(X) = value(X) * y + l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X))
+            num_add_lag += 2 * (num_perm_slices - 1);
+            num_mul_lag += 2 * (num_perm_slices - 1);
+            // delta_start * beta_start
+            num_mul_lag += 1;
         }
 
-        // delta_start * beta_start
-        num_mul += 1;
         // And for all the sets we enforce:
         // (1 - (l_last(X) + l_blind(X))) * (
         //   z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
         // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
         // )
-        num_mul = {
+        (num_add_lag, num_mul_lag) = {
             // Calculate left = z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
-            let mut tmp_num_mul = 0;
-            tmp_num_mul += 2 * chunk_len;
+            let mut tmp_num_add_lag = 0;
+            let mut tmp_num_mul_lag = 0;
+            tmp_num_add_lag += 2 * chunk_len;
+            tmp_num_mul_lag += 2 * chunk_len;
             // Calculate right = z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma), current_delta *= DELTA
-            tmp_num_mul += chunk_len;
-            tmp_num_mul += chunk_len;
-            // Merge (1 - (l_last(X) + l_blind(X))) * (
+            tmp_num_add_lag += 2 * chunk_len;
+            tmp_num_mul_lag += chunk_len;
+            tmp_num_mul_lag += chunk_len;
+            // value(X) = value(X) * y + (1 - (l_last(X) + l_blind(X))) * (
             //   z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
             // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
-            // ) into h.
-            tmp_num_mul += 2;
-            num_mul + tmp_num_mul * num_perm_slices
+            // ).
+            tmp_num_add_lag += 2;
+            tmp_num_mul_lag += 2;
+            (
+                num_add_lag + tmp_num_add_lag * num_perm_slices,
+                num_mul_lag + tmp_num_mul_lag * num_perm_slices,
+            )
         };
         // beta_term *= &extended_omega;
         if num_perm_slices > 0usize {
-            num_mul += 1;
+            num_mul_lag += 1;
         }
 
         // Lookups
-        // l_0(X) * (1 - z(X)) = 0, 2 add, 2 mul
-        // l_last(X) * (z(X)^2 - z(X)) = 0, 2 add, 3 mul
-        // (1 - (l_last(X) + l_blind(X))) * (
-        //   z(\omega X) (a'(X) + \beta) (s'(X) + \gamma)
-        //   - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta)
-        //          (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma)
-        // ) = 0,  4 add, 5 mul
-        // l_0(X) * (a'(X) - s'(X)) = 0, 1 add, 2 mul
-        // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0, 2 add, 3 mul
         let num_lookups = pk.get_vk().get_cs().lookups.len();
-        // l_0(X) * (1 - z(X)) = 0
-        num_mul += 2 * num_lookups;
-        // l_last(X) * (z(X)^2 - z(X)) = 0
-        num_mul += 3 * num_lookups;
-        // (1 - (l_last(X) + l_blind(X))) * (
+        // a_minus_s
+        num_add_lag += num_lookups;
+        // value(X) = value(X) * y + l_0(X) * (1 - z(X))
+        num_add_lag += 2 * num_lookups;
+        num_mul_lag += 2 * num_lookups;
+        // value(X) = value(X) * y + l_last(X) * (z(X)^2 - z(X))
+        num_add_lag += 2 * num_lookups;
+        num_mul_lag += 3 * num_lookups;
+        // value(X) = value(X) * y + (1 - (l_last(X) + l_blind(X))) * (
         //   z(\omega X) (a'(X) + \beta) (s'(X) + \gamma)
         //   - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta)
         //          (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma)
-        // ) = 0
-        num_mul += 5 * num_lookups;
-        // l_0(X) * (a'(X) - s'(X)) = 0
-        num_mul += 2 * num_lookups;
+        // )
+        num_add_lag += 4 * num_lookups;
+        num_mul_lag += 5 * num_lookups;
+        // value(X) = value(X) * y + l_0(X) * (a'(X) - s'(X))
+        num_add_lag += 1 * num_lookups;
+        num_mul_lag += 2 * num_lookups;
         // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0
-        num_mul += 3 * num_lookups;
+        num_add_lag += 2 * num_lookups;
+        num_mul_lag += 3 * num_lookups;
 
-        num_mul *= l;
+        num_add_lag *= l;
+        num_mul_lag *= l;
 
-        num_mul
+        (num_add_lag, num_mul_lag)
     }
 }
 
@@ -154,13 +170,12 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     k: usize,
 ) -> EstimateResult {
     // Generate small vk & pk
-    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(k as u32);
+    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(15_u32);
     let vk = keygen_vk(&params, &circuit).expect("keygen_vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("keygen_pk should not fail");
 
     let l = 1;
 
-    // NOTE(sphere): init params
     // Initialize the polynomial commitment parameters
     let cs = pk.get_vk().get_cs();
 
@@ -182,15 +197,15 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     let params = generate_fake_params(k);
 
     // Initialize the domain
-    let domain = EvaluationDomain::fake_new(cs.degree() as u32, params.k, E::Scalar::random(OsRng));
+    let domain = EvaluationDomain::new(cs.degree() as u32, params.k);
 
     let n = 1 << k as usize;
     let rand_ele = E::Scalar::random(&mut OsRng);
     let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele).collect();
     let rand_vec2 = rand_vec.clone();
-    let rand_values = domain.lagrange_from_vec(rand_vec);
+    let rand_values = domain.lagrange_from_vec(rand_vec.clone());
 
-    // NOTE(sphere): estimate op time
+    // Estimate the time of each operation.
     //      msm
     let (time_msm, _) = measure_elapsed_time(|| params.commit_lagrange(&rand_values));
     //      fft
@@ -216,20 +231,60 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
 
     let num_threads = multicore::current_num_threads();
 
-    // NOTE(sphere): estimate op count
+    // Estimate the number of each operation.
     let FuncCount {
         num_fft,
         num_extended_fft,
         num_msm,
         num_btree,
+        num_add,
         num_mul,
+        num_kate_div,
+        num_add_lag,
+        num_mul_lag,
         mem_usage,
     } = dummy_proof(&params, &pk, &domain, l);
 
-    let estimate_add_mul_field_op_time = || {
+    let estimate_add_mul_lag_field_op_time = || {
         let m = (domain.extended_len() + num_threads - 1) / num_threads;
-        let a = rand_ele;
+        let mut a = rand_ele;
         let mut b = rand_ele;
+        //      m add field ops
+        let (time_add_lag, _) = measure_elapsed_time(|| {
+            for _ in 0..m {
+                a = a + b;
+            }
+            a
+        });
+        //      m mul field ops
+        let (time_mul_lag, _) = measure_elapsed_time(|| {
+            for _ in 0..m {
+                b = a * b;
+            }
+            b
+        });
+        println!(
+            "num_add_lag = {}, time_add_lag = {}",
+            num_add_lag, time_add_lag
+        );
+        println!(
+            "num_mul_lag = {}, time_mul_lag = {}",
+            num_mul_lag, time_mul_lag
+        );
+        (num_add_lag as f64) * time_add_lag + (num_mul_lag as f64) * time_mul_lag
+    };
+
+    let estimate_add_mul_field_op_time = || {
+        let m = ((1 << k) + num_threads - 1) / num_threads;
+        let mut a = rand_ele;
+        let mut b = rand_ele;
+        //      m add field ops
+        let (time_add, _) = measure_elapsed_time(|| {
+            for _ in 0..m {
+                a = a + b;
+            }
+            a
+        });
         //      m mul field ops
         let (time_mul, _) = measure_elapsed_time(|| {
             for _ in 0..m {
@@ -237,8 +292,9 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
             }
             b
         });
+        println!("num_add = {}, time_add = {}", num_add, time_add);
         println!("num_mul = {}, time_mul = {}", num_mul, time_mul);
-        (num_mul as f64) * time_mul
+        (num_add as f64) * time_add + (num_mul as f64) * time_mul
     };
 
     println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
@@ -253,9 +309,17 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         + (num_extended_fft as f64) * time_extended_fft
         + (num_msm as f64) * time_msm
         + (num_btree as f64) * time_btree;
-    println!("pt_non_linear = {}", pt_non_linear);
+    println!("pt_non_linear = {}\n", pt_non_linear);
 
-    let pt_linear = estimate_add_mul_field_op_time();
+    let (time_kate_div, _) = measure_elapsed_time(|| kate_division(&rand_vec, rand_ele));
+    println!(
+        "num_kate_div = {}, time_kate_div = {}",
+        num_kate_div, time_kate_div
+    );
+
+    let pt_linear = estimate_add_mul_lag_field_op_time()
+        + estimate_add_mul_field_op_time()
+        + (num_kate_div as f64) * time_kate_div;
     println!("pt_linear = {}", pt_linear);
 
     let (pt_random, _) = measure_elapsed_time(|| {
@@ -281,7 +345,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     // );
     // println!("mem_usage by linear regression = {}", mem_usage2);
 
-    // NOTE(sphere): calculate aggregate_circuit_size
+    // calculate aggregate_circuit_size
 
     EstimateResult {
         prover_time,
@@ -289,7 +353,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     }
 }
 
-/// simulate_circuit is to run a circuit proving process.
+/// Run a circuit proving process.
 pub fn simulate_circuit<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     circuit: ConcreteCircuit,
     k: usize,
@@ -311,7 +375,6 @@ pub fn simulate_circuit<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
             .expect("proof generation should not fail")
     });
 
-    // NOTE(liutainyi): output prover_time
     println!("k = {}, prover_time = {}", k, prover_time);
 }
 
@@ -320,7 +383,11 @@ struct FuncCount {
     num_extended_fft: usize,
     num_msm: usize,
     num_btree: usize,
+    num_add: usize,
     num_mul: usize,
+    num_kate_div: usize,
+    num_add_lag: usize,
+    num_mul_lag: usize,
     mem_usage: usize,
 }
 
@@ -334,72 +401,172 @@ fn dummy_proof<C: CurveAffine>(
     let mut num_extended_fft = 0_usize;
     let mut num_msm = 0_usize;
     let mut num_btree = 0_usize;
+    let mut num_add = 0_usize;
+    let mut num_mul = 0_usize;
+    let mut num_kate_div = 0_usize;
 
     let cs = pk.get_vk().get_cs();
 
     // (instance, advice) calculate (poly, coset, commitment)
 
-    // NOTE(sphere): ins_commit, pt += l * n_ins * commit_lagrange_t
+    // ins_commit, pt += l * n_ins * commit_lagrange_t
     num_msm += l * cs.num_instance_columns;
-    // NOTE(sphere): ins_poly, pt += l * n_ins + lagrange_to_coeff_t
+    // ins_poly, pt += l * n_ins + lagrange_to_coeff_t
     num_fft += l * cs.num_instance_columns;
-    // NOTE(sphere): ins_coset, pt += l * n_ins + coeff_to_extended_t
+    // ins_coset, pt += l * n_ins + coeff_to_extended_t
     num_extended_fft += l * cs.num_instance_columns;
-    // NOTE(sphere): adv_commit, pt += l * n_adv * commit_lagrange_t
+    // adv_commit, pt += l * n_adv * commit_lagrange_t
     num_msm += l * cs.num_advice_columns;
-    // NOTE(sphere): adv_poly, pt += l * n_adv * lagrange_to_coeff_t
+    // adv_poly, pt += l * n_adv * lagrange_to_coeff_t
     num_fft += l * cs.num_advice_columns;
-    // NOTE(sphere): adv_coset, pt += l * n_adv * coeff_to_extended_t
+    // adv_coset, pt += l * n_adv * coeff_to_extended_t
     num_extended_fft += l * cs.num_advice_columns;
 
-    // NOTE(sphere): pt += l * n_lookup * commit_permuted
-    //      NOTE(sphere): BTree cost for A' and S'.
+    // pt += l * n_lookup * commit_permuted
+    //      BTree cost for A' and S'.
     let num_lookups = cs.lookups.len();
     num_btree += l * num_lookups;
 
     // Commit to permutations.
-    // NOTE(sphere): l * perm_commit_t
+    // l * perm_commit_t
     //      commit_lagrange: z
     let num_perm_slices =
         (cs.permutation.get_columns().len() + (cs.degree() - 3)) / (cs.degree() - 2);
-    num_msm += num_perm_slices;
+    num_msm += l * num_perm_slices;
     //      lagrange_to_coeff: z
-    num_fft += num_perm_slices;
+    num_fft += l * num_perm_slices;
     //      coeff_to_extended: z
-    num_extended_fft += num_perm_slices;
+    num_extended_fft += l * num_perm_slices;
 
-    // NOTE(sphere): pt += lookup_commit_product
+    // pt += lookup_commit_product
     //      commit_lagrange: z, a', s'
-    num_msm += 3 * num_lookups;
+    num_msm += l * 3 * num_lookups;
     //      lagrange_to_coeff: z, a', s'
-    num_fft += 3 * num_lookups;
+    num_fft += l * 3 * num_lookups;
 
     // Commit to the vanishing argument's random polynomial for blinding h(x_3)
-    // NOTE(sphere): vanishing_commit
+    // vanishing_commit
     //      commit: random_poly
     num_msm += 1;
 
     // Evaluate the h(X) polynomial
-    // NOTE(sphere): evaluate_h 3 coeff_to_extended for each lookup argument
+    // evaluate_h 3 coeff_to_extended for each lookup argument
     num_extended_fft += l * 3 * num_lookups;
 
     // Construct the vanishing argument's h(X) commitments
-    // NOTE(sphere): pt += vanishing_construct
+    // pt += vanishing_construct
     //      extended_to_coeff: h_poly
     num_extended_fft += 1;
     //      commit: h_poly_i
     let num_h_pieces = ((domain.extended_len() as u64 + params.n - 1) / params.n) as usize;
     num_msm += num_h_pieces;
 
-    // NOTE(sphere): evaluate h.
-    let num_mul = pk.get_ev().fake_evaluate_h(pk, l);
+    // Evaluate h.
+    let (num_add_lag, num_mul_lag) = pk.get_ev().fake_evaluate_h(pk, l);
 
-    // TODO(sphere): multiopen(shplonk). There should be a more detailed evaluation.
+    // Estimate multiopen(gwc).
     //      commit: h_x, h_x
     //      The evaluations in multiopen is too small.
-    num_msm += 2;
+    // Initialize the query sets.
+    let cs = pk.get_vk().get_cs();
+    let queries = (0..l)
+        .flat_map(|_| {
+            iter::empty()
+                .chain(
+                    cs.instance_queries
+                        .iter()
+                        .map(move |&(column, at)| FakeProverQuery { rotation: at }),
+                )
+                .chain(
+                    cs.advice_queries
+                        .iter()
+                        .map(move |&(column, at)| FakeProverQuery { rotation: at }),
+                )
+                .chain((0..num_perm_slices).flat_map(|_| {
+                    iter::empty()
+                        // Open permutation product commitments at x and \omega x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::next(),
+                        }))
+                }))
+                // Open it at \omega^{last} x for all but the last set. This rotation is only
+                // sensical for the first row, but we only use this rotation in a constraint
+                // that is gated on l_0.
+                .chain((0..num_perm_slices).rev().skip(1).flat_map(|_| {
+                    Some(FakeProverQuery {
+                        rotation: Rotation(-1),
+                    })
+                }))
+                .chain((0..num_lookups).flat_map(|_| {
+                    iter::empty()
+                        // Open lookup product commitments at x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        // Open lookup input commitments at x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        // Open lookup table commitments at x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        // Open lookup input commitments at x_inv
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::prev(),
+                        }))
+                        // Open lookup product commitments at x_next
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::next(),
+                        }))
+                }))
+        })
+        .chain(
+            cs.fixed_queries
+                .iter()
+                .map(|&(_, at)| FakeProverQuery { rotation: at }),
+        )
+        .chain(
+            (0..cs.permutation.get_columns().len()).map(|_| FakeProverQuery {
+                rotation: Rotation::cur(),
+            }),
+        )
+        // We query the h(X) polynomial at x
+        .chain(
+            iter::empty()
+                .chain(Some(FakeProverQuery {
+                    rotation: Rotation::cur(),
+                }))
+                .chain(Some(FakeProverQuery {
+                    rotation: Rotation::cur(),
+                })),
+        );
+    let mut point_query_map: BTreeMap<Rotation, usize> = BTreeMap::new();
+    for query in queries {
+        if let Some(queries) = point_query_map.get_mut(&query.rotation) {
+            *queries = *queries + 1_usize;
+        } else {
+            point_query_map.insert(query.rotation, 1);
+        }
+    }
+
+    for rot in point_query_map.keys() {
+        let cnt = point_query_map.get(rot).unwrap();
+        // poly_batch = poly_batch * *v + poly;
+        // eval_batch = eval_batch * *v + eval;
+        num_add += cnt;
+        num_mul += cnt;
+
+        // poly_batch = &poly_batch - eval_batch;
+        num_add += 1;
+        num_kate_div += 1;
+        num_msm += 1;
+    }
 
-    // NOTE(sphere): Memory
+    // Memory
     let mut mem_usage = 0_usize;
     //      instance / advice / fixed as value poly, and coset:
     let n = 1 << params.k as usize;
@@ -438,26 +605,28 @@ fn dummy_proof<C: CurveAffine>(
         num_extended_fft,
         num_msm,
         num_btree,
+        num_add,
         num_mul,
+        num_kate_div,
+        num_add_lag,
+        num_mul_lag,
         mem_usage,
     }
 }
 
-/// cost_model_main is to generate a main function to run the cost model for a circuit.
+/// Generate a main function to run the cost model for a circuit.
 #[macro_export]
 macro_rules! cost_model_main {
     ($cir:expr) => {
         use halo2_proofs::dev::{estimate, simulate_circuit};
 
         fn main() {
-            // NOTE(sphere): get k from args
             let mode = std::env::args().nth(1).expect("no running-mode given");
             let k = std::env::args()
                 .nth(2)
                 .expect("no circuit size given")
                 .parse()
                 .unwrap();
-            // NOTE(sphere): estimate linear cost (cfg == simulate)
             let circuit = $cir;
             if mode.eq(&String::from("simulate")) {
                 simulate_circuit::<Bn256, _>(circuit, k);
diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs
index 8d1b0a72e3..5204ebef26 100644
--- a/halo2_proofs/src/poly/domain.rs
+++ b/halo2_proofs/src/poly/domain.rs
@@ -141,33 +141,6 @@ impl<G: Group> EvaluationDomain<G> {
         }
     }
 
-    /// Generate a fake domain.
-    pub fn fake_new(j: u32, k: u32, rand_scalar: G::Scalar) -> Self {
-        let n = 1 << k;
-        let quotient_poly_degree = (j - 1) as i32;
-        let mut extended_k = k;
-        while (1 << extended_k) < (n * quotient_poly_degree) {
-            extended_k += 1;
-        }
-
-        EvaluationDomain {
-            n: n as u64,
-            k: k as u32,
-            extended_k: extended_k as u32,
-            omega: rand_scalar,
-            omega_inv: rand_scalar,
-            extended_omega: rand_scalar,
-            extended_omega_inv: rand_scalar,
-            g_coset: rand_scalar,
-            g_coset_inv: rand_scalar,
-            quotient_poly_degree: quotient_poly_degree as u64,
-            ifft_divisor: rand_scalar,
-            extended_ifft_divisor: rand_scalar,
-            t_evaluations: (1..(1 << (extended_k - k))).map(|_| rand_scalar).collect(),
-            barycentric_weight: rand_scalar,
-        }
-    }
-
     /// Obtains a polynomial in Lagrange form when given a vector of Lagrange
     /// coefficients of size `n`; panics if the provided vector is the wrong
     /// length.

From 2eef6c5ba2d13483f8bc42aa54fae4bbe0132ce1 Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Sun, 11 Sep 2022 18:46:18 +0000
Subject: [PATCH 6/9] Reduce the running time and estimate the multiopen cost

---
 halo2_proofs/src/dev/cost_model.rs | 394 ++++++++++++++++++++---------
 halo2_proofs/src/poly/domain.rs    |  27 --
 2 files changed, 280 insertions(+), 141 deletions(-)

diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index 3c5b7ff05b..fba974230d 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -1,8 +1,8 @@
 //! Circuit cost model.
-use std::{collections::BTreeMap, fs, io, mem, time::Instant};
+use std::{collections::BTreeMap, fs, io, iter, mem, time::Instant};
 
 use crate::{
-    arithmetic::{eval_polynomial, CurveAffine, Engine, Field},
+    arithmetic::{eval_polynomial, kate_division, CurveAffine, Engine, Field},
     circuit::{Cell, Layouter, SimpleFloorPlanner},
     multicore,
     plonk::*,
@@ -16,7 +16,7 @@ use rayon::current_num_threads;
 
 use super::CircuitCost;
 
-/// measure the elapsed time.
+/// Measures the elapsed time of a closure.
 pub fn measure_elapsed_time<T, F: FnOnce() -> T>(f: F) -> (f64, T) {
     let start = Instant::now();
     let res = f();
@@ -35,116 +35,129 @@ impl EstimateResult {
     pub fn print(&self) {
         println!("prover time = {} (s)", self.prover_time);
         println!("memory usage = {} (KB)", self.mem_usage);
-        // println!("aggregate circuit size = {}", aggregate_circuit_size);
     }
 }
 
 impl Calculation {
-    fn fake_evaluate<F: Field>(&self) -> usize {
+    // The returned argument is the number of additions and multiplications.
+    fn fake_evaluate<F: Field>(&self) -> (usize, usize) {
         match self {
-            Calculation::Add(_, _) => 0,
-            Calculation::Sub(_, _) => 0,
-            Calculation::Mul(_, _) => 1,
-            Calculation::Negate(_) => 0,
-            Calculation::LcBeta(_, _) => 1,
-            Calculation::LcTheta(_, _) => 1,
-            Calculation::AddGamma(_) => 0,
-            Calculation::Store(_) => 0,
+            Calculation::Add(_, _) => (1, 0),
+            Calculation::Sub(_, _) => (1, 0),
+            Calculation::Mul(_, _) => (0, 1),
+            Calculation::Negate(_) => (1, 0),
+            Calculation::LcBeta(_, _) => (1, 1),
+            Calculation::LcTheta(_, _) => (1, 1),
+            Calculation::AddGamma(_) => (1, 0),
+            Calculation::Store(_) => (0, 0),
         }
     }
 }
 
+struct FakeProverQuery {
+    rotation: Rotation,
+}
+
 impl<C: CurveAffine> Evaluator<C> {
-    fn fake_evaluate_h(&self, pk: &ProvingKey<C>, l: usize) -> usize {
+    // Returns the number of hadamard addition and product operations.
+    fn fake_evaluate_h(&self, pk: &ProvingKey<C>, l: usize) -> (usize, usize) {
         let cs = pk.get_vk().get_cs();
-        let mut num_mul = 0;
+        let mut num_mul_lag = 0;
+        let mut num_add_lag = 0;
         // All calculations, with cached intermediate results
         for calc in self.calculations.iter() {
-            let tmp_num_mul = calc.calculation.fake_evaluate::<C::Scalar>();
-            num_mul += tmp_num_mul;
+            let (tmp_num_add_lag, tmp_num_mul_lag) = calc.calculation.fake_evaluate::<C::Scalar>();
+            num_add_lag += tmp_num_add_lag;
+            num_mul_lag += tmp_num_mul_lag;
         }
 
         // Accumulate value parts
-        num_mul += self.value_parts.len();
+        num_add_lag += self.value_parts.len();
+        num_mul_lag += self.value_parts.len();
 
         for table_result in self.lookup_results.iter() {
-            let tmp_num_mul = table_result.fake_evaluate::<C::Scalar>();
-            num_mul += tmp_num_mul;
+            let (tmp_num_add_lag, tmp_num_mul_lag) = table_result.fake_evaluate::<C::Scalar>();
+            num_add_lag += tmp_num_add_lag;
+            num_mul_lag += tmp_num_mul_lag;
         }
 
         // Permutations
         let chunk_len = cs.degree() - 2;
         let num_perm_slices = (cs.permutation.get_columns().len() + chunk_len - 1) / chunk_len;
 
-        // Enforce only for the first set.
-        // l_0(X) * (1 - z_0(X)) = 0
-        num_mul += 2;
-
-        // Enforce only for the last set.
-        // l_last(X) * (z_l(X)^2 - z_l(X)) = 0
-        num_mul += 3;
-
-        // Except for the first set, enforce.
-        // l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X)) = 0
         if num_perm_slices > 0 {
-            num_mul += 2 * (num_perm_slices - 1);
-        }
-
-        // delta_start * beta_start
-        num_mul += 1;
-        // And for all the sets we enforce:
-        // (1 - (l_last(X) + l_blind(X))) * (
-        //   z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
-        // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
-        // )
-        num_mul = {
+            // Enforce only for the first set.
+            // value(X) = value(X) * y + l_0(X) * (1 - z_0(X))
+            num_add_lag += 2;
+            num_mul_lag += 2;
+            // Enforce only for the last set.
+            // value(X) = value(X) * y + l_last(X) * (z_l(X)^2 - z_l(X))
+            num_add_lag += 2;
+            num_mul_lag += 3;
+            // Except for the first set, enforce.
+            // value(X) = value(X) * y + l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X))
+            num_add_lag += 2 * (num_perm_slices - 1);
+            num_mul_lag += 2 * (num_perm_slices - 1);
+            // delta_start * beta_start
+            num_mul_lag += 1;
+
+            // And for all the sets we enforce:
+            // (1 - (l_last(X) + l_blind(X))) * (
+            //   z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
+            // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
+            // )
+            
             // Calculate left = z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
-            let mut tmp_num_mul = 0;
-            tmp_num_mul += 2 * chunk_len;
+            let mut tmp_num_add_lag = 0;
+            let mut tmp_num_mul_lag = 0;
+            tmp_num_add_lag += 2 * chunk_len;
+            tmp_num_mul_lag += 2 * chunk_len;
             // Calculate right = z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma), current_delta *= DELTA
-            tmp_num_mul += chunk_len;
-            tmp_num_mul += chunk_len;
-            // Merge (1 - (l_last(X) + l_blind(X))) * (
+            tmp_num_add_lag += 2 * chunk_len;
+            tmp_num_mul_lag += chunk_len;
+            tmp_num_mul_lag += chunk_len;
+            // value(X) = value(X) * y + (1 - (l_last(X) + l_blind(X))) * (
             //   z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma)
             // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma)
-            // ) into h.
-            tmp_num_mul += 2;
-            num_mul + tmp_num_mul * num_perm_slices
-        };
-        // beta_term *= &extended_omega;
-        if num_perm_slices > 0usize {
-            num_mul += 1;
+            // ).
+            tmp_num_add_lag += 2;
+            tmp_num_mul_lag += 2;
+            
+            num_add_lag += tmp_num_add_lag * num_perm_slices;
+            num_mul_lag += tmp_num_mul_lag * num_perm_slices;
+
+            // beta_term *= &extended_omega;
+            num_mul_lag += 1;
         }
 
         // Lookups
-        // l_0(X) * (1 - z(X)) = 0, 2 add, 2 mul
-        // l_last(X) * (z(X)^2 - z(X)) = 0, 2 add, 3 mul
-        // (1 - (l_last(X) + l_blind(X))) * (
-        //   z(\omega X) (a'(X) + \beta) (s'(X) + \gamma)
-        //   - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta)
-        //          (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma)
-        // ) = 0,  4 add, 5 mul
-        // l_0(X) * (a'(X) - s'(X)) = 0, 1 add, 2 mul
-        // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0, 2 add, 3 mul
         let num_lookups = pk.get_vk().get_cs().lookups.len();
-        // l_0(X) * (1 - z(X)) = 0
-        num_mul += 2 * num_lookups;
-        // l_last(X) * (z(X)^2 - z(X)) = 0
-        num_mul += 3 * num_lookups;
-        // (1 - (l_last(X) + l_blind(X))) * (
+        // a_minus_s
+        num_add_lag += num_lookups;
+        // value(X) = value(X) * y + l_0(X) * (1 - z(X))
+        num_add_lag += 2 * num_lookups;
+        num_mul_lag += 2 * num_lookups;
+        // value(X) = value(X) * y + l_last(X) * (z(X)^2 - z(X))
+        num_add_lag += 2 * num_lookups;
+        num_mul_lag += 3 * num_lookups;
+        // value(X) = value(X) * y + (1 - (l_last(X) + l_blind(X))) * (
         //   z(\omega X) (a'(X) + \beta) (s'(X) + \gamma)
         //   - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta)
         //          (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma)
-        // ) = 0
-        num_mul += 5 * num_lookups;
-        // l_0(X) * (a'(X) - s'(X)) = 0
-        num_mul += 2 * num_lookups;
+        // )
+        num_add_lag += 4 * num_lookups;
+        num_mul_lag += 5 * num_lookups;
+        // value(X) = value(X) * y + l_0(X) * (a'(X) - s'(X))
+        num_add_lag += 1 * num_lookups;
+        num_mul_lag += 2 * num_lookups;
         // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0
-        num_mul += 3 * num_lookups;
+        num_add_lag += 2 * num_lookups;
+        num_mul_lag += 3 * num_lookups;
 
-        num_mul *= l;
+        num_add_lag *= l;
+        num_mul_lag *= l;
 
-        num_mul
+        (num_add_lag, num_mul_lag)
     }
 }
 
@@ -154,13 +167,12 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     k: usize,
 ) -> EstimateResult {
     // Generate small vk & pk
-    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(k as u32);
+    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(15_u32);
     let vk = keygen_vk(&params, &circuit).expect("keygen_vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("keygen_pk should not fail");
 
     let l = 1;
 
-    // NOTE(sphere): init params
     // Initialize the polynomial commitment parameters
     let cs = pk.get_vk().get_cs();
 
@@ -182,15 +194,15 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     let params = generate_fake_params(k);
 
     // Initialize the domain
-    let domain = EvaluationDomain::fake_new(cs.degree() as u32, params.k, E::Scalar::random(OsRng));
+    let domain = EvaluationDomain::new(cs.degree() as u32, params.k);
 
     let n = 1 << k as usize;
     let rand_ele = E::Scalar::random(&mut OsRng);
     let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele).collect();
     let rand_vec2 = rand_vec.clone();
-    let rand_values = domain.lagrange_from_vec(rand_vec);
+    let rand_values = domain.lagrange_from_vec(rand_vec.clone());
 
-    // NOTE(sphere): estimate op time
+    // Estimate the time of each operation.
     //      msm
     let (time_msm, _) = measure_elapsed_time(|| params.commit_lagrange(&rand_values));
     //      fft
@@ -216,20 +228,60 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
 
     let num_threads = multicore::current_num_threads();
 
-    // NOTE(sphere): estimate op count
+    // Estimate the number of each operation.
     let FuncCount {
         num_fft,
         num_extended_fft,
         num_msm,
         num_btree,
+        num_add,
         num_mul,
+        num_kate_div,
+        num_add_lag,
+        num_mul_lag,
         mem_usage,
     } = dummy_proof(&params, &pk, &domain, l);
 
-    let estimate_add_mul_field_op_time = || {
+    let estimate_add_mul_lag_field_op_time = || {
         let m = (domain.extended_len() + num_threads - 1) / num_threads;
-        let a = rand_ele;
+        let mut a = rand_ele;
+        let mut b = rand_ele;
+        //      m add field ops
+        let (time_add_lag, _) = measure_elapsed_time(|| {
+            for _ in 0..m {
+                a = a + b;
+            }
+            a
+        });
+        //      m mul field ops
+        let (time_mul_lag, _) = measure_elapsed_time(|| {
+            for _ in 0..m {
+                b = a * b;
+            }
+            b
+        });
+        println!(
+            "num_add_lag = {}, time_add_lag = {}",
+            num_add_lag, time_add_lag
+        );
+        println!(
+            "num_mul_lag = {}, time_mul_lag = {}",
+            num_mul_lag, time_mul_lag
+        );
+        (num_add_lag as f64) * time_add_lag + (num_mul_lag as f64) * time_mul_lag
+    };
+
+    let estimate_add_mul_field_op_time = || {
+        let m = ((1 << k) + num_threads - 1) / num_threads;
+        let mut a = rand_ele;
         let mut b = rand_ele;
+        //      m add field ops
+        let (time_add, _) = measure_elapsed_time(|| {
+            for _ in 0..m {
+                a = a + b;
+            }
+            a
+        });
         //      m mul field ops
         let (time_mul, _) = measure_elapsed_time(|| {
             for _ in 0..m {
@@ -237,8 +289,9 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
             }
             b
         });
+        println!("num_add = {}, time_add = {}", num_add, time_add);
         println!("num_mul = {}, time_mul = {}", num_mul, time_mul);
-        (num_mul as f64) * time_mul
+        (num_add as f64) * time_add + (num_mul as f64) * time_mul
     };
 
     println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
@@ -253,9 +306,17 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         + (num_extended_fft as f64) * time_extended_fft
         + (num_msm as f64) * time_msm
         + (num_btree as f64) * time_btree;
-    println!("pt_non_linear = {}", pt_non_linear);
+    println!("pt_non_linear = {}\n", pt_non_linear);
 
-    let pt_linear = estimate_add_mul_field_op_time();
+    let (time_kate_div, _) = measure_elapsed_time(|| kate_division(&rand_vec, rand_ele));
+    println!(
+        "num_kate_div = {}, time_kate_div = {}",
+        num_kate_div, time_kate_div
+    );
+
+    let pt_linear = estimate_add_mul_lag_field_op_time()
+        + estimate_add_mul_field_op_time()
+        + (num_kate_div as f64) * time_kate_div;
     println!("pt_linear = {}", pt_linear);
 
     let (pt_random, _) = measure_elapsed_time(|| {
@@ -281,7 +342,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     // );
     // println!("mem_usage by linear regression = {}", mem_usage2);
 
-    // NOTE(sphere): calculate aggregate_circuit_size
+    // calculate aggregate_circuit_size
 
     EstimateResult {
         prover_time,
@@ -289,7 +350,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     }
 }
 
-/// simulate_circuit is to run a circuit proving process.
+/// Run a circuit proving process.
 pub fn simulate_circuit<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     circuit: ConcreteCircuit,
     k: usize,
@@ -311,7 +372,6 @@ pub fn simulate_circuit<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
             .expect("proof generation should not fail")
     });
 
-    // NOTE(liutainyi): output prover_time
     println!("k = {}, prover_time = {}", k, prover_time);
 }
 
@@ -320,7 +380,11 @@ struct FuncCount {
     num_extended_fft: usize,
     num_msm: usize,
     num_btree: usize,
+    num_add: usize,
     num_mul: usize,
+    num_kate_div: usize,
+    num_add_lag: usize,
+    num_mul_lag: usize,
     mem_usage: usize,
 }
 
@@ -334,72 +398,172 @@ fn dummy_proof<C: CurveAffine>(
     let mut num_extended_fft = 0_usize;
     let mut num_msm = 0_usize;
     let mut num_btree = 0_usize;
+    let mut num_add = 0_usize;
+    let mut num_mul = 0_usize;
+    let mut num_kate_div = 0_usize;
 
     let cs = pk.get_vk().get_cs();
 
     // (instance, advice) calculate (poly, coset, commitment)
 
-    // NOTE(sphere): ins_commit, pt += l * n_ins * commit_lagrange_t
+    // ins_commit, pt += l * n_ins * commit_lagrange_t
     num_msm += l * cs.num_instance_columns;
-    // NOTE(sphere): ins_poly, pt += l * n_ins + lagrange_to_coeff_t
+    // ins_poly, pt += l * n_ins + lagrange_to_coeff_t
     num_fft += l * cs.num_instance_columns;
-    // NOTE(sphere): ins_coset, pt += l * n_ins + coeff_to_extended_t
+    // ins_coset, pt += l * n_ins + coeff_to_extended_t
     num_extended_fft += l * cs.num_instance_columns;
-    // NOTE(sphere): adv_commit, pt += l * n_adv * commit_lagrange_t
+    // adv_commit, pt += l * n_adv * commit_lagrange_t
     num_msm += l * cs.num_advice_columns;
-    // NOTE(sphere): adv_poly, pt += l * n_adv * lagrange_to_coeff_t
+    // adv_poly, pt += l * n_adv * lagrange_to_coeff_t
     num_fft += l * cs.num_advice_columns;
-    // NOTE(sphere): adv_coset, pt += l * n_adv * coeff_to_extended_t
+    // adv_coset, pt += l * n_adv * coeff_to_extended_t
     num_extended_fft += l * cs.num_advice_columns;
 
-    // NOTE(sphere): pt += l * n_lookup * commit_permuted
-    //      NOTE(sphere): BTree cost for A' and S'.
+    // pt += l * n_lookup * commit_permuted
+    //      BTree cost for A' and S'.
     let num_lookups = cs.lookups.len();
     num_btree += l * num_lookups;
 
     // Commit to permutations.
-    // NOTE(sphere): l * perm_commit_t
+    // l * perm_commit_t
     //      commit_lagrange: z
     let num_perm_slices =
         (cs.permutation.get_columns().len() + (cs.degree() - 3)) / (cs.degree() - 2);
-    num_msm += num_perm_slices;
+    num_msm += l * num_perm_slices;
     //      lagrange_to_coeff: z
-    num_fft += num_perm_slices;
+    num_fft += l * num_perm_slices;
     //      coeff_to_extended: z
-    num_extended_fft += num_perm_slices;
+    num_extended_fft += l * num_perm_slices;
 
-    // NOTE(sphere): pt += lookup_commit_product
+    // pt += lookup_commit_product
     //      commit_lagrange: z, a', s'
-    num_msm += 3 * num_lookups;
+    num_msm += l * 3 * num_lookups;
     //      lagrange_to_coeff: z, a', s'
-    num_fft += 3 * num_lookups;
+    num_fft += l * 3 * num_lookups;
 
     // Commit to the vanishing argument's random polynomial for blinding h(x_3)
-    // NOTE(sphere): vanishing_commit
+    // vanishing_commit
     //      commit: random_poly
     num_msm += 1;
 
     // Evaluate the h(X) polynomial
-    // NOTE(sphere): evaluate_h 3 coeff_to_extended for each lookup argument
+    // evaluate_h 3 coeff_to_extended for each lookup argument
     num_extended_fft += l * 3 * num_lookups;
 
     // Construct the vanishing argument's h(X) commitments
-    // NOTE(sphere): pt += vanishing_construct
+    // pt += vanishing_construct
     //      extended_to_coeff: h_poly
     num_extended_fft += 1;
     //      commit: h_poly_i
     let num_h_pieces = ((domain.extended_len() as u64 + params.n - 1) / params.n) as usize;
     num_msm += num_h_pieces;
 
-    // NOTE(sphere): evaluate h.
-    let num_mul = pk.get_ev().fake_evaluate_h(pk, l);
+    // Evaluate h.
+    let (num_add_lag, num_mul_lag) = pk.get_ev().fake_evaluate_h(pk, l);
 
-    // TODO(sphere): multiopen(shplonk). There should be a more detailed evaluation.
+    // Estimate multiopen(gwc).
     //      commit: h_x, h_x
     //      The evaluations in multiopen is too small.
-    num_msm += 2;
+    // Initialize the query sets.
+    let cs = pk.get_vk().get_cs();
+    let queries = (0..l)
+        .flat_map(|_| {
+            iter::empty()
+                .chain(
+                    cs.instance_queries
+                        .iter()
+                        .map(move |&(column, at)| FakeProverQuery { rotation: at }),
+                )
+                .chain(
+                    cs.advice_queries
+                        .iter()
+                        .map(move |&(column, at)| FakeProverQuery { rotation: at }),
+                )
+                .chain((0..num_perm_slices).flat_map(|_| {
+                    iter::empty()
+                        // Open permutation product commitments at x and \omega x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::next(),
+                        }))
+                }))
+                // Open it at \omega^{last} x for all but the last set. This rotation is only
+                // sensical for the first row, but we only use this rotation in a constraint
+                // that is gated on l_0.
+                .chain((0..num_perm_slices).rev().skip(1).flat_map(|_| {
+                    Some(FakeProverQuery {
+                        rotation: Rotation(-1),
+                    })
+                }))
+                .chain((0..num_lookups).flat_map(|_| {
+                    iter::empty()
+                        // Open lookup product commitments at x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        // Open lookup input commitments at x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        // Open lookup table commitments at x
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::cur(),
+                        }))
+                        // Open lookup input commitments at x_inv
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::prev(),
+                        }))
+                        // Open lookup product commitments at x_next
+                        .chain(Some(FakeProverQuery {
+                            rotation: Rotation::next(),
+                        }))
+                }))
+        })
+        .chain(
+            cs.fixed_queries
+                .iter()
+                .map(|&(_, at)| FakeProverQuery { rotation: at }),
+        )
+        .chain(
+            (0..cs.permutation.get_columns().len()).map(|_| FakeProverQuery {
+                rotation: Rotation::cur(),
+            }),
+        )
+        // We query the h(X) polynomial at x
+        .chain(
+            iter::empty()
+                .chain(Some(FakeProverQuery {
+                    rotation: Rotation::cur(),
+                }))
+                .chain(Some(FakeProverQuery {
+                    rotation: Rotation::cur(),
+                })),
+        );
+    let mut point_query_map: BTreeMap<Rotation, usize> = BTreeMap::new();
+    for query in queries {
+        if let Some(queries) = point_query_map.get_mut(&query.rotation) {
+            *queries = *queries + 1_usize;
+        } else {
+            point_query_map.insert(query.rotation, 1);
+        }
+    }
+
+    for rot in point_query_map.keys() {
+        let cnt = point_query_map.get(rot).unwrap();
+        // poly_batch = poly_batch * *v + poly;
+        // eval_batch = eval_batch * *v + eval;
+        num_add += cnt;
+        num_mul += cnt;
+
+        // poly_batch = &poly_batch - eval_batch;
+        num_add += 1;
+        num_kate_div += 1;
+        num_msm += 1;
+    }
 
-    // NOTE(sphere): Memory
+    // Memory
     let mut mem_usage = 0_usize;
     //      instance / advice / fixed as value poly, and coset:
     let n = 1 << params.k as usize;
@@ -438,26 +602,28 @@ fn dummy_proof<C: CurveAffine>(
         num_extended_fft,
         num_msm,
         num_btree,
+        num_add,
         num_mul,
+        num_kate_div,
+        num_add_lag,
+        num_mul_lag,
         mem_usage,
     }
 }
 
-/// cost_model_main is to generate a main function to run the cost model for a circuit.
+/// Generate a main function to run the cost model for a circuit.
 #[macro_export]
 macro_rules! cost_model_main {
     ($cir:expr) => {
         use halo2_proofs::dev::{estimate, simulate_circuit};
 
         fn main() {
-            // NOTE(sphere): get k from args
             let mode = std::env::args().nth(1).expect("no running-mode given");
             let k = std::env::args()
                 .nth(2)
                 .expect("no circuit size given")
                 .parse()
                 .unwrap();
-            // NOTE(sphere): estimate linear cost (cfg == simulate)
             let circuit = $cir;
             if mode.eq(&String::from("simulate")) {
                 simulate_circuit::<Bn256, _>(circuit, k);
diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs
index 8d1b0a72e3..5204ebef26 100644
--- a/halo2_proofs/src/poly/domain.rs
+++ b/halo2_proofs/src/poly/domain.rs
@@ -141,33 +141,6 @@ impl<G: Group> EvaluationDomain<G> {
         }
     }
 
-    /// Generate a fake domain.
-    pub fn fake_new(j: u32, k: u32, rand_scalar: G::Scalar) -> Self {
-        let n = 1 << k;
-        let quotient_poly_degree = (j - 1) as i32;
-        let mut extended_k = k;
-        while (1 << extended_k) < (n * quotient_poly_degree) {
-            extended_k += 1;
-        }
-
-        EvaluationDomain {
-            n: n as u64,
-            k: k as u32,
-            extended_k: extended_k as u32,
-            omega: rand_scalar,
-            omega_inv: rand_scalar,
-            extended_omega: rand_scalar,
-            extended_omega_inv: rand_scalar,
-            g_coset: rand_scalar,
-            g_coset_inv: rand_scalar,
-            quotient_poly_degree: quotient_poly_degree as u64,
-            ifft_divisor: rand_scalar,
-            extended_ifft_divisor: rand_scalar,
-            t_evaluations: (1..(1 << (extended_k - k))).map(|_| rand_scalar).collect(),
-            barycentric_weight: rand_scalar,
-        }
-    }
-
     /// Obtains a polynomial in Lagrange form when given a vector of Lagrange
     /// coefficients of size `n`; panics if the provided vector is the wrong
     /// length.

From 53bc44760c06b9083662146f6d97d91b454363df Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Sun, 11 Sep 2022 20:36:52 +0000
Subject: [PATCH 7/9] Change the method to estimate hadamard operations.

---
 halo2_proofs/src/dev/cost_model.rs | 110 +++++++++++++----------------
 1 file changed, 51 insertions(+), 59 deletions(-)

diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index 7f9d66943b..46a6a3b5d8 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -2,7 +2,7 @@
 use std::{collections::BTreeMap, fs, io, iter, mem, time::Instant};
 
 use crate::{
-    arithmetic::{eval_polynomial, kate_division, CurveAffine, Engine, Field},
+    arithmetic::{eval_polynomial, kate_division, CurveAffine, Engine, Field, parallelize},
     circuit::{Cell, Layouter, SimpleFloorPlanner},
     multicore,
     plonk::*,
@@ -198,7 +198,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
 
     let n = 1 << k as usize;
     let rand_ele = E::Scalar::random(&mut OsRng);
-    let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele).collect();
+    let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele.clone()).collect();
     let rand_vec2 = rand_vec.clone();
     let rand_values = domain.lagrange_from_vec(rand_vec.clone());
 
@@ -208,7 +208,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     //      fft
     let (time_fft, rand_poly) = measure_elapsed_time(|| domain.lagrange_to_coeff(rand_values));
     //      extended fft
-    let (time_extended_fft, _) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
+    let (time_extended_fft, rand_coset) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
     //      BTree time cost in lookup argument
     let (time_btree, _) = measure_elapsed_time(|| {
         let mut leftover_table_map: BTreeMap<E::Scalar, u32> =
@@ -226,8 +226,6 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         }
     });
 
-    let num_threads = multicore::current_num_threads();
-
     // Estimate the number of each operation.
     let FuncCount {
         num_fft,
@@ -242,58 +240,6 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         mem_usage,
     } = dummy_proof(&params, &pk, &domain, l);
 
-    let estimate_add_mul_lag_field_op_time = || {
-        let m = (domain.extended_len() + num_threads - 1) / num_threads;
-        let mut a = rand_ele;
-        let mut b = rand_ele;
-        //      m add field ops
-        let (time_add_lag, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                a = a + b;
-            }
-            a
-        });
-        //      m mul field ops
-        let (time_mul_lag, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                b = a * b;
-            }
-            b
-        });
-        println!(
-            "num_add_lag = {}, time_add_lag = {}",
-            num_add_lag, time_add_lag
-        );
-        println!(
-            "num_mul_lag = {}, time_mul_lag = {}",
-            num_mul_lag, time_mul_lag
-        );
-        (num_add_lag as f64) * time_add_lag + (num_mul_lag as f64) * time_mul_lag
-    };
-
-    let estimate_add_mul_field_op_time = || {
-        let m = ((1 << k) + num_threads - 1) / num_threads;
-        let mut a = rand_ele;
-        let mut b = rand_ele;
-        //      m add field ops
-        let (time_add, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                a = a + b;
-            }
-            a
-        });
-        //      m mul field ops
-        let (time_mul, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                b = a * b;
-            }
-            b
-        });
-        println!("num_add = {}, time_add = {}", num_add, time_add);
-        println!("num_mul = {}, time_mul = {}", num_mul, time_mul);
-        (num_add as f64) * time_add + (num_mul as f64) * time_mul
-    };
-
     println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
     println!(
         "num_extended_fft = {}, time_extended_fft = {}",
@@ -308,14 +254,60 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         + (num_btree as f64) * time_btree;
     println!("pt_non_linear = {}\n", pt_non_linear);
 
+    let mut rand_ext_vec: Vec<E::Scalar> = (0..domain.extended_len()).map(|_| rand_ele.clone()).collect();
+    let (time_add_lag, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_ext_vec, |rand_ext_vec, _| {
+            for value in rand_ext_vec.iter_mut() {
+                *value + rand_ele;
+            }
+        })
+    });
+    println!(
+        "num_add_lag = {}, time_add_lag = {}",
+        num_add_lag, time_add_lag
+    );
+
+    let (time_mul_lag, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_ext_vec, |rand_ext_vec, _| {
+            for value in rand_ext_vec.iter_mut() {
+                *value * rand_ele;
+            }
+        })
+    });
+    println!(
+        "num_mul_lag = {}, time_mul_lag = {}",
+        num_mul_lag, time_mul_lag
+    );
+
+    let mut rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele.clone()).collect();
+    let (time_add, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_vec, |rand_vec, _| {
+            for value in rand_vec.iter_mut() {
+                *value + rand_ele;
+            }
+        })
+    });
+    println!("num_add = {}, time_add = {}", num_add, time_add);
+
+    let (time_mul, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_vec, |rand_vec, _| {
+            for value in rand_vec.iter_mut() {
+                *value * rand_ele;
+            }
+        })
+    });
+    println!("num_mul = {}, time_mul = {}", num_mul, time_mul);
+
     let (time_kate_div, _) = measure_elapsed_time(|| kate_division(&rand_vec, rand_ele));
     println!(
         "num_kate_div = {}, time_kate_div = {}",
         num_kate_div, time_kate_div
     );
 
-    let pt_linear = estimate_add_mul_lag_field_op_time()
-        + estimate_add_mul_field_op_time()
+    let pt_linear = (num_add_lag as f64) * time_add_lag 
+        + (num_mul_lag as f64) * time_mul_lag
+        + (num_add as f64) * time_add
+        + (num_mul as f64) * time_mul
         + (num_kate_div as f64) * time_kate_div;
     println!("pt_linear = {}", pt_linear);
 

From 577f39cc5fbb6630ce217d236b43cd0e70539561 Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Sun, 11 Sep 2022 20:36:52 +0000
Subject: [PATCH 8/9] Change the method to estimate hadamard operations.

---
 halo2_proofs/src/dev/cost_model.rs | 113 ++++++++++++++---------------
 1 file changed, 54 insertions(+), 59 deletions(-)

diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index 7f9d66943b..fc002066f6 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -2,7 +2,7 @@
 use std::{collections::BTreeMap, fs, io, iter, mem, time::Instant};
 
 use crate::{
-    arithmetic::{eval_polynomial, kate_division, CurveAffine, Engine, Field},
+    arithmetic::{eval_polynomial, kate_division, parallelize, CurveAffine, Engine, Field},
     circuit::{Cell, Layouter, SimpleFloorPlanner},
     multicore,
     plonk::*,
@@ -198,7 +198,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
 
     let n = 1 << k as usize;
     let rand_ele = E::Scalar::random(&mut OsRng);
-    let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele).collect();
+    let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele.clone()).collect();
     let rand_vec2 = rand_vec.clone();
     let rand_values = domain.lagrange_from_vec(rand_vec.clone());
 
@@ -208,7 +208,8 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     //      fft
     let (time_fft, rand_poly) = measure_elapsed_time(|| domain.lagrange_to_coeff(rand_values));
     //      extended fft
-    let (time_extended_fft, _) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
+    let (time_extended_fft, rand_coset) =
+        measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
     //      BTree time cost in lookup argument
     let (time_btree, _) = measure_elapsed_time(|| {
         let mut leftover_table_map: BTreeMap<E::Scalar, u32> =
@@ -226,8 +227,6 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         }
     });
 
-    let num_threads = multicore::current_num_threads();
-
     // Estimate the number of each operation.
     let FuncCount {
         num_fft,
@@ -242,58 +241,6 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         mem_usage,
     } = dummy_proof(&params, &pk, &domain, l);
 
-    let estimate_add_mul_lag_field_op_time = || {
-        let m = (domain.extended_len() + num_threads - 1) / num_threads;
-        let mut a = rand_ele;
-        let mut b = rand_ele;
-        //      m add field ops
-        let (time_add_lag, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                a = a + b;
-            }
-            a
-        });
-        //      m mul field ops
-        let (time_mul_lag, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                b = a * b;
-            }
-            b
-        });
-        println!(
-            "num_add_lag = {}, time_add_lag = {}",
-            num_add_lag, time_add_lag
-        );
-        println!(
-            "num_mul_lag = {}, time_mul_lag = {}",
-            num_mul_lag, time_mul_lag
-        );
-        (num_add_lag as f64) * time_add_lag + (num_mul_lag as f64) * time_mul_lag
-    };
-
-    let estimate_add_mul_field_op_time = || {
-        let m = ((1 << k) + num_threads - 1) / num_threads;
-        let mut a = rand_ele;
-        let mut b = rand_ele;
-        //      m add field ops
-        let (time_add, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                a = a + b;
-            }
-            a
-        });
-        //      m mul field ops
-        let (time_mul, _) = measure_elapsed_time(|| {
-            for _ in 0..m {
-                b = a * b;
-            }
-            b
-        });
-        println!("num_add = {}, time_add = {}", num_add, time_add);
-        println!("num_mul = {}, time_mul = {}", num_mul, time_mul);
-        (num_add as f64) * time_add + (num_mul as f64) * time_mul
-    };
-
     println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
     println!(
         "num_extended_fft = {}, time_extended_fft = {}",
@@ -308,14 +255,62 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         + (num_btree as f64) * time_btree;
     println!("pt_non_linear = {}\n", pt_non_linear);
 
+    let mut rand_ext_vec: Vec<E::Scalar> = (0..domain.extended_len())
+        .map(|_| rand_ele.clone())
+        .collect();
+    let (time_add_lag, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_ext_vec, |rand_ext_vec, _| {
+            for value in rand_ext_vec.iter_mut() {
+                *value + rand_ele;
+            }
+        })
+    });
+    println!(
+        "num_add_lag = {}, time_add_lag = {}",
+        num_add_lag, time_add_lag
+    );
+
+    let (time_mul_lag, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_ext_vec, |rand_ext_vec, _| {
+            for value in rand_ext_vec.iter_mut() {
+                *value * rand_ele;
+            }
+        })
+    });
+    println!(
+        "num_mul_lag = {}, time_mul_lag = {}",
+        num_mul_lag, time_mul_lag
+    );
+
+    let mut rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele.clone()).collect();
+    let (time_add, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_vec, |rand_vec, _| {
+            for value in rand_vec.iter_mut() {
+                *value + rand_ele;
+            }
+        })
+    });
+    println!("num_add = {}, time_add = {}", num_add, time_add);
+
+    let (time_mul, _) = measure_elapsed_time(|| {
+        parallelize(&mut rand_vec, |rand_vec, _| {
+            for value in rand_vec.iter_mut() {
+                *value * rand_ele;
+            }
+        })
+    });
+    println!("num_mul = {}, time_mul = {}", num_mul, time_mul);
+
     let (time_kate_div, _) = measure_elapsed_time(|| kate_division(&rand_vec, rand_ele));
     println!(
         "num_kate_div = {}, time_kate_div = {}",
         num_kate_div, time_kate_div
     );
 
-    let pt_linear = estimate_add_mul_lag_field_op_time()
-        + estimate_add_mul_field_op_time()
+    let pt_linear = (num_add_lag as f64) * time_add_lag
+        + (num_mul_lag as f64) * time_mul_lag
+        + (num_add as f64) * time_add
+        + (num_mul as f64) * time_mul
         + (num_kate_div as f64) * time_kate_div;
     println!("pt_linear = {}", pt_linear);
 

From 7b638ad7336b23a71a6119c2a94418125e6d8350 Mon Sep 17 00:00:00 2001
From: spherel <sph6r6.l1u@gmail.com>
Date: Fri, 23 Sep 2022 02:46:00 +0000
Subject: [PATCH 9/9] Remove fake pk

---
 halo2_proofs/src/dev/cost_model.rs | 57 ++++++++++--------------------
 1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs
index fc002066f6..872566e4f0 100644
--- a/halo2_proofs/src/dev/cost_model.rs
+++ b/halo2_proofs/src/dev/cost_model.rs
@@ -6,7 +6,10 @@ use crate::{
     circuit::{Cell, Layouter, SimpleFloorPlanner},
     multicore,
     plonk::*,
-    poly::{commitment::Params, commitment::ParamsVerifier, EvaluationDomain, Rotation},
+    poly::{
+        batch_invert_assigned, commitment::Params, commitment::ParamsVerifier, EvaluationDomain,
+        Rotation,
+    },
     transcript::{Blake2bRead, Blake2bWrite, Challenge255},
 };
 use group::{prime::PrimeCurveAffine, GroupEncoding};
@@ -60,8 +63,7 @@ struct FakeProverQuery {
 
 impl<C: CurveAffine> Evaluator<C> {
     // Returns the number of hadamard addition and product operations.
-    fn fake_evaluate_h(&self, pk: &ProvingKey<C>, l: usize) -> (usize, usize) {
-        let cs = pk.get_vk().get_cs();
+    fn fake_evaluate_h(&self, cs: &ConstraintSystem<C::Scalar>, l: usize) -> (usize, usize) {
         let mut num_mul_lag = 0;
         let mut num_add_lag = 0;
         // All calculations, with cached intermediate results
@@ -131,7 +133,7 @@ impl<C: CurveAffine> Evaluator<C> {
         }
 
         // Lookups
-        let num_lookups = pk.get_vk().get_cs().lookups.len();
+        let num_lookups = cs.lookups.len();
         // a_minus_s
         num_add_lag += num_lookups;
         // value(X) = value(X) * y + l_0(X) * (1 - z(X))
@@ -166,17 +168,13 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     circuit: ConcreteCircuit,
     k: usize,
 ) -> EstimateResult {
-    // Generate small vk & pk
-    let params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(15_u32);
-    let vk = keygen_vk(&params, &circuit).expect("keygen_vk should not fail");
-    let pk = keygen_pk(&params, vk, &circuit).expect("keygen_pk should not fail");
-
-    let l = 1;
-
-    // Initialize the polynomial commitment parameters
-    let cs = pk.get_vk().get_cs();
+    // Initialize the polynomial commitment parameters for small k
+    let small_params: Params<E::G1Affine> = Params::<E::G1Affine>::unsafe_setup::<E>(15_u32);
+    // To run synthesize and convert simple_selectors to fixed columns.
+    let vk = keygen_vk(&small_params, &circuit).expect("keygen_vk should not fail");
+    let cs = vk.get_cs();
 
-    let generate_fake_params = |k| {
+    let params = {
         let s = E::Scalar::random(OsRng);
         let rand_c1 = <E::G1Affine as PrimeCurveAffine>::generator() * s;
         let rand_c2 = <E::G2Affine as PrimeCurveAffine>::generator() * s;
@@ -191,11 +189,10 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         }
     };
 
-    let params = generate_fake_params(k);
-
     // Initialize the domain
-    let domain = EvaluationDomain::new(cs.degree() as u32, params.k);
+    let domain = EvaluationDomain::new(cs.degree() as u32, k as u32);
 
+    let l = 1; // The number of instances
     let n = 1 << k as usize;
     let rand_ele = E::Scalar::random(&mut OsRng);
     let rand_vec: Vec<E::Scalar> = (0..n).map(|_| rand_ele.clone()).collect();
@@ -208,8 +205,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
     //      fft
     let (time_fft, rand_poly) = measure_elapsed_time(|| domain.lagrange_to_coeff(rand_values));
     //      extended fft
-    let (time_extended_fft, rand_coset) =
-        measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
+    let (time_extended_fft, _) = measure_elapsed_time(|| domain.coeff_to_extended(rand_poly));
     //      BTree time cost in lookup argument
     let (time_btree, _) = measure_elapsed_time(|| {
         let mut leftover_table_map: BTreeMap<E::Scalar, u32> =
@@ -239,7 +235,7 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
         num_add_lag,
         num_mul_lag,
         mem_usage,
-    } = dummy_proof(&params, &pk, &domain, l);
+    } = dummy_proof(&params, &cs, &domain, l);
 
     println!("num_fft = {}, time_fft = {}", num_fft, time_fft);
     println!(
@@ -326,19 +322,6 @@ pub fn estimate<E: Engine, ConcreteCircuit: Circuit<E::Scalar>>(
 
     let prover_time = pt_non_linear + pt_linear + pt_random;
 
-    // let calc_linear_term = |x_1: f64, y_1: f64, x_2: f64, y_2: f64, x_3 :f64| {
-    //     y_1 + (y_2 - y_1) / (x_2 - x_1) * (x_3 - x_1)
-    // };
-
-    // let mem_usage2 = calc_linear_term(
-    //     (1 << res_1.k) as f64, res_1.mem_usage,
-    //     (1 << res_2.k) as f64, res_2.mem_usage,
-    //     (1 << k) as f64,
-    // );
-    // println!("mem_usage by linear regression = {}", mem_usage2);
-
-    // calculate aggregate_circuit_size
-
     EstimateResult {
         prover_time,
         mem_usage: (mem_usage as f64) / 1024.0, // to KB
@@ -385,7 +368,7 @@ struct FuncCount {
 
 fn dummy_proof<C: CurveAffine>(
     params: &Params<C>,
-    pk: &ProvingKey<C>,
+    cs: &ConstraintSystem<C::Scalar>,
     domain: &EvaluationDomain<C::Scalar>,
     l: usize, // The number of input.
 ) -> FuncCount {
@@ -397,8 +380,6 @@ fn dummy_proof<C: CurveAffine>(
     let mut num_mul = 0_usize;
     let mut num_kate_div = 0_usize;
 
-    let cs = pk.get_vk().get_cs();
-
     // (instance, advice) calculate (poly, coset, commitment)
 
     // ins_commit, pt += l * n_ins * commit_lagrange_t
@@ -454,13 +435,13 @@ fn dummy_proof<C: CurveAffine>(
     num_msm += num_h_pieces;
 
     // Evaluate h.
-    let (num_add_lag, num_mul_lag) = pk.get_ev().fake_evaluate_h(pk, l);
+    let ev = Evaluator::<C>::new(cs);
+    let (num_add_lag, num_mul_lag) = ev.fake_evaluate_h(cs, l);
 
     // Estimate multiopen(gwc).
     //      commit: h_x, h_x
     //      The evaluations in multiopen is too small.
     // Initialize the query sets.
-    let cs = pk.get_vk().get_cs();
     let queries = (0..l)
         .flat_map(|_| {
             iter::empty()