diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index 68291c0985..a21d163983 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -17,6 +17,7 @@ env:
   CARGO_TERM_COLOR: always
   POWDR_GENERATE_PROOFS: "true"
   MAX_DEGREE_LOG: "20"
+  RUST_MIN_STACK: "1073741824"
 
 jobs:
   build:
@@ -234,6 +235,8 @@ jobs:
       - name: Install pilcom
         run: git clone https://github.com/0xPolygonHermez/pilcom.git  && cd pilcom && npm install
       - uses: taiki-e/install-action@nextest
+      - name: Increase Stack Size
+        run: ulimit -s 32768
       - name: Run slow tests
         # Number threads is set to 2 because the runner does not have enough memory for more.
         run: |
diff --git a/executor/src/witgen/eval_result.rs b/executor/src/witgen/eval_result.rs
index b25fc6724a..30a42955ba 100644
--- a/executor/src/witgen/eval_result.rs
+++ b/executor/src/witgen/eval_result.rs
@@ -47,6 +47,8 @@ pub enum IncompleteCause<K = usize> {
     SymbolicEvaluationOfChallenge,
     /// Some knowledge was learnt, but not a concrete value. Example: `Y = X` if we know that `Y` is boolean. We learn that `X` is boolean, but not its exact value.
     NotConcrete,
+    /// The JIT compiler was not able to generate a function that computes a unique witness.
+    JitCompilationFailed,
     Multiple(Vec<IncompleteCause<K>>),
 }
 
diff --git a/executor/src/witgen/jit/block_machine_processor.rs b/executor/src/witgen/jit/block_machine_processor.rs
index 4db4f1438e..4f5f1fe988 100644
--- a/executor/src/witgen/jit/block_machine_processor.rs
+++ b/executor/src/witgen/jit/block_machine_processor.rs
@@ -198,6 +198,37 @@ impl<'a, T: FieldElement> BlockMachineProcessor<'a, T> {
 
         result.code = self.try_ensure_block_shape(result.code, &requested_known)?;
 
+        let needed_machine_call_variables = result
+            .code
+            .iter()
+            .flat_map(|effect| {
+                if let Effect::MachineCall(_, _, arguments) = effect {
+                    for a in arguments {
+                        assert!(matches!(a, Variable::MachineCallParam(_)));
+                    }
+                    arguments.clone()
+                } else {
+                    vec![]
+                }
+            })
+            .collect::<BTreeSet<_>>();
+
+        result.code = result
+            .code
+            .into_iter()
+            .filter(|effect| {
+                if let Effect::Assignment(variable, _) = effect {
+                    if let Variable::MachineCallParam(_) = variable {
+                        needed_machine_call_variables.contains(variable)
+                    } else {
+                        true
+                    }
+                } else {
+                    true
+                }
+            })
+            .collect();
+
         Ok((result, prover_functions))
     }
 
diff --git a/executor/src/witgen/jit/compiler.rs b/executor/src/witgen/jit/compiler.rs
index 4fbdc98eba..94e72e48fb 100644
--- a/executor/src/witgen/jit/compiler.rs
+++ b/executor/src/witgen/jit/compiler.rs
@@ -287,22 +287,7 @@ fn witgen_code<T: FieldElement>(
         .format("\n");
     // We do not store "known" together with the values, because we hope
     // that this way, the optimizer can group them better.
-    let store_known = vars_known
-        .iter()
-        .filter_map(|var| match var {
-            Variable::WitnessCell(cell) => Some(cell),
-            Variable::Param(_)
-            | Variable::FixedCell(_)
-            | Variable::IntermediateCell(_)
-            | Variable::MachineCallParam(_) => None,
-        })
-        .map(|cell| {
-            format!(
-                "    set_known(known, row_offset, {}, {});",
-                cell.row_offset, cell.id
-            )
-        })
-        .format("\n");
+    let store_known = "".to_string();
     format!(
         r#"
 #[no_mangle]
@@ -353,13 +338,26 @@ fn format_effects_inner<T: FieldElement>(
 ) -> String {
     effects
         .iter()
-        .map(|effect| format_effect(effect, is_top_level))
+        .filter_map(|effect| {
+            let code = format_effect(effect, is_top_level);
+            if code.is_empty() {
+                None
+            } else {
+                Some(code)
+            }
+        })
         .join("\n")
 }
 
 fn format_effect<T: FieldElement>(effect: &Effect<T, Variable>, is_top_level: bool) -> String {
     match effect {
-        Effect::Assignment(var, e) => set(var, &format_expression(e), is_top_level, false),
+        Effect::Assignment(var, e) => {
+            if let Variable::MachineCallParam(_) = var {
+                return "".to_string();
+            } else {
+                set(var, &format_expression(e), is_top_level, false)
+            }
+        }
         Effect::RangeConstraint(..) => {
             unreachable!("Final code should not contain pure range constraints.")
         }
@@ -403,9 +401,10 @@ fn format_effect<T: FieldElement>(effect: &Effect<T, Variable>, is_top_level: bo
                     .to_string()
                     + "\n"
             };
-            format!(
-                "{var_decls}assert!(call_machine(mutable_state, {id}.into(), MutSlice::from((&mut [{args}]).as_mut_slice())));"
-            )
+            // format!(
+            //     "{var_decls}assert!(call_machine(mutable_state, {id}.into(), MutSlice::from((&mut [{args}]).as_mut_slice())));"
+            // )
+            format!("{var_decls}// Skipping machine call")
         }
         Effect::ProverFunctionCall(ProverFunctionCall {
             targets,
@@ -422,8 +421,9 @@ fn format_effect<T: FieldElement>(effect: &Effect<T, Variable>, is_top_level: bo
                 .enumerate()
                 .map(|(i, v)| set(v, &format!("result[{i}]"), is_top_level, false))
                 .format("\n");
-            let block = format!("{function_call}\n{store_results}");
-            format!("{{\n{}\n}}", indent(block, 1))
+            let block = format!("{}\n{}", function_call, store_results);
+            // format!("{{\n{}\n}}", indent(block, 1))
+            "// Skipping prover function".to_string()
         }
         Effect::Branch(condition, first, second) => {
             let var_decls = if is_top_level {
diff --git a/executor/src/witgen/jit/function_cache.rs b/executor/src/witgen/jit/function_cache.rs
index 633850d854..c564a5a8ae 100644
--- a/executor/src/witgen/jit/function_cache.rs
+++ b/executor/src/witgen/jit/function_cache.rs
@@ -36,8 +36,8 @@ pub struct FunctionCache<'a, T: FieldElement> {
     /// The processor that generates the JIT code
     processor: BlockMachineProcessor<'a, T>,
     /// The cache of JIT functions and the returned range constraints.
-    /// If the entry is None, we attempted to generate the function but failed.
-    witgen_functions: HashMap<CacheKey<T>, Option<CacheEntry<T>>>,
+    /// If the entry is Err, we attempted to generate the function but failed.
+    witgen_functions: HashMap<CacheKey<T>, Result<CacheEntry<T>, CompilationError>>,
     column_layout: ColumnLayout,
     block_size: usize,
     machine_name: String,
@@ -49,6 +49,12 @@ pub struct CacheEntry<T: FieldElement> {
     pub range_constraints: Vec<RangeConstraint<T>>,
 }
 
+#[derive(Debug)]
+pub enum CompilationError {
+    UnsupportedField,
+    Other(String),
+}
+
 impl<'a, T: FieldElement> FunctionCache<'a, T> {
     pub fn new(
         fixed_data: &'a FixedData<'a, T>,
@@ -81,7 +87,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
         bus_id: T,
         known_args: &BitVec,
         known_concrete: Option<(usize, T)>,
-    ) -> &Option<CacheEntry<T>> {
+    ) -> &Result<CacheEntry<T>, CompilationError> {
         // First try the generic version, then the specific.
         let mut key = CacheKey {
             bus_id,
@@ -89,7 +95,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
             known_concrete: None,
         };
 
-        if self.ensure_cache(can_process.clone(), &key).is_none() && known_concrete.is_some() {
+        if self.ensure_cache(can_process.clone(), &key).is_err() && known_concrete.is_some() {
             key = CacheKey {
                 bus_id,
                 known_args: known_args.clone(),
@@ -104,7 +110,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
         &mut self,
         can_process: impl CanProcessCall<T>,
         cache_key: &CacheKey<T>,
-    ) -> &Option<CacheEntry<T>> {
+    ) -> &Result<CacheEntry<T>, CompilationError> {
         if !self.witgen_functions.contains_key(cache_key) {
             record_start("Auto-witgen code derivation");
             let f = match T::known_field() {
@@ -112,7 +118,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
                 Some(KnownField::GoldilocksField) => {
                     self.compile_witgen_function(can_process, cache_key)
                 }
-                _ => None,
+                _ => Err(CompilationError::UnsupportedField),
             };
             assert!(self.witgen_functions.insert(cache_key.clone(), f).is_none());
             record_end("Auto-witgen code derivation");
@@ -124,7 +130,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
         &self,
         can_process: impl CanProcessCall<T>,
         cache_key: &CacheKey<T>,
-    ) -> Option<CacheEntry<T>> {
+    ) -> Result<CacheEntry<T>, CompilationError> {
         log::info!(
             "Compiling JIT function for\n  Machine: {}\n  Connection: {}\n   Inputs: {:?}{}",
             self.machine_name,
@@ -151,13 +157,9 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
                 cache_key.known_concrete,
             )
             .map_err(|e| {
-                // These errors can be pretty verbose and are quite common currently.
-                log::info!(
-                    "=> Error generating JIT code: {}\n...",
-                    e.to_string().lines().take(5).join("\n")
-                );
-            })
-            .ok()?;
+                log::info!("{e}");
+                CompilationError::Other(e)
+            })?;
 
         log::info!("=> Success!");
         let out_of_bounds_vars = code
@@ -198,7 +200,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
         .unwrap();
         log::info!("Compilation done.");
 
-        Some(CacheEntry {
+        Ok(CacheEntry {
             function,
             range_constraints,
         })
@@ -223,6 +225,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
             known_concrete,
         };
 
+        log::info!("Calling compiled function for {:?}", cache_key);
         self.witgen_functions
             .get(&cache_key)
             .or_else(|| {
@@ -237,6 +240,7 @@ impl<'a, T: FieldElement> FunctionCache<'a, T> {
             .expect("compile_cached() returned false!")
             .function
             .call(self.fixed_data, mutable_state, values, data);
+        log::info!("Done calling function");
 
         Ok(true)
     }
diff --git a/executor/src/witgen/jit/includes/interface.rs b/executor/src/witgen/jit/includes/interface.rs
index 68c1349a03..acec0bee50 100644
--- a/executor/src/witgen/jit/includes/interface.rs
+++ b/executor/src/witgen/jit/includes/interface.rs
@@ -2,7 +2,6 @@
 // const column_count: u64 = ...;
 // const first_column_id: u64 = ...;
 
-#[inline]
 fn known_to_slice<'a>(known: *mut u32, len: u64) -> &'a mut [u32] {
     let words_per_row = (column_count + 31) / 32;
     let rows = len / column_count;
@@ -10,14 +9,12 @@ fn known_to_slice<'a>(known: *mut u32, len: u64) -> &'a mut [u32] {
     unsafe { std::slice::from_raw_parts_mut(known, known_len as usize) }
 }
 
-#[inline]
 fn index(global_offset: u64, local_offset: i32, column: u64) -> usize {
     let column = column - first_column_id;
     let row = (global_offset as i64 + local_offset as i64) as u64;
     (row * column_count + column) as usize
 }
 
-#[inline]
 fn index_known(global_offset: u64, local_offset: i32, column: u64) -> (u64, u64) {
     let column = column - first_column_id;
     let row = (global_offset as i64 + local_offset as i64) as u64;
@@ -25,12 +22,10 @@ fn index_known(global_offset: u64, local_offset: i32, column: u64) -> (u64, u64)
     (row * words_per_row + column / 32, column % 32)
 }
 
-#[inline]
 fn get(data: &[FieldElement], global_offset: u64, local_offset: i32, column: u64) -> FieldElement {
     data[index(global_offset, local_offset, column)]
 }
 
-#[inline]
 fn set(
     data: &mut [FieldElement],
     global_offset: u64,
@@ -42,20 +37,17 @@ fn set(
     data[i] = value;
 }
 
-#[inline]
 fn set_known(known: &mut [u32], global_offset: u64, local_offset: i32, column: u64) {
     let (known_idx, known_bit) = index_known(global_offset, local_offset, column);
     known[known_idx as usize] |= 1 << (known_bit);
 }
 
-#[inline]
 fn get_param(params: &[LookupCell<FieldElement>], i: usize) -> FieldElement {
     match params[i] {
         LookupCell::Input(v) => *v,
         LookupCell::Output(_) => panic!("Output cell used as input"),
     }
 }
-#[inline]
 fn set_param(params: &mut [LookupCell<FieldElement>], i: usize, value: FieldElement) {
     match &mut params[i] {
         LookupCell::Input(_) => panic!("Input cell used as output"),
@@ -78,7 +70,6 @@ pub struct MutSlice<T> {
 }
 
 impl<T> From<&mut [T]> for MutSlice<T> {
-    #[inline]
     fn from(slice: &mut [T]) -> Self {
         MutSlice {
             data: slice.as_mut_ptr(),
@@ -88,7 +79,6 @@ impl<T> From<&mut [T]> for MutSlice<T> {
 }
 
 impl<T> MutSlice<T> {
-    #[inline]
     fn to_mut_slice<'a>(self) -> &'a mut [T] {
         unsafe { std::slice::from_raw_parts_mut(self.data, self.len as usize) }
     }
diff --git a/executor/src/witgen/machines/block_machine.rs b/executor/src/witgen/machines/block_machine.rs
index 04e7d83b57..52f1750cac 100644
--- a/executor/src/witgen/machines/block_machine.rs
+++ b/executor/src/witgen/machines/block_machine.rs
@@ -13,7 +13,7 @@ use crate::witgen::data_structures::caller_data::CallerData;
 use crate::witgen::data_structures::finalizable_data::FinalizableData;
 use crate::witgen::data_structures::mutable_state::MutableState;
 use crate::witgen::global_constraints::RangeConstraintSet;
-use crate::witgen::jit::function_cache::FunctionCache;
+use crate::witgen::jit::function_cache::{CompilationError, FunctionCache};
 use crate::witgen::jit::witgen_inference::CanProcessCall;
 use crate::witgen::processor::{OuterQuery, Processor, SolverState};
 use crate::witgen::range_constraints::RangeConstraint;
@@ -183,8 +183,8 @@ impl<'a, T: FieldElement> Machine<'a, T> for BlockMachine<'a, T> {
             known_arguments,
             fixed_first_input,
         ) {
-            Some(entry) => (true, entry.range_constraints.clone()),
-            None => (false, range_constraints),
+            Ok(entry) => (true, entry.range_constraints.clone()),
+            Err(_) => (false, range_constraints),
         }
     }
 
@@ -454,16 +454,27 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
         let fixed_first_input = arguments
             .first()
             .and_then(|a| a.constant_value().map(|v| (0, v)));
-        if self
-            .function_cache
-            .compile_cached(mutable_state, bus_id, &known_inputs, fixed_first_input)
-            .is_some()
-        {
-            let caller_data = CallerData::new(arguments, range_constraints);
-            let updates = self.process_lookup_via_jit(mutable_state, bus_id, caller_data)?;
-            assert!(updates.is_complete());
-            self.block_count_jit += 1;
-            return Ok(updates);
+        match self.function_cache.compile_cached(
+            mutable_state,
+            bus_id,
+            &known_inputs,
+            fixed_first_input,
+        ) {
+            Ok(_) => {
+                let caller_data = CallerData::new(arguments, range_constraints);
+                let updates = self.process_lookup_via_jit(mutable_state, bus_id, caller_data)?;
+                assert!(updates.is_complete());
+                self.block_count_jit += 1;
+                return Ok(updates);
+            }
+            Err(CompilationError::Other(_e)) => {
+                // Assuming the JIT compiler is feature-complete, this means that the witness is not
+                // unique, which could happen e.g. if not all required arguments are provided.
+                return Ok(EvalValue::incomplete(IncompleteCause::JitCompilationFailed));
+            }
+            // If we're on an unsupported field, this won't be fixed in future invocations.
+            // Fall back to run-time witgen.
+            Err(CompilationError::UnsupportedField) => {}
         }
 
         let outer_query = OuterQuery::new(
diff --git a/jit-compiler/src/compiler.rs b/jit-compiler/src/compiler.rs
index 517b774637..4ad0018112 100644
--- a/jit-compiler/src/compiler.rs
+++ b/jit-compiler/src/compiler.rs
@@ -84,7 +84,7 @@ fn cargo_toml(opt_level: Option<u32>) -> String {
     }
 }
 
-const DEBUG: bool = false;
+const DEBUG: bool = true;
 
 /// Compiles the given code and returns the path to the
 /// temporary directory containing the compiled library
diff --git a/jit-compiler/src/includes/field_generic_up_to_64.rs b/jit-compiler/src/includes/field_generic_up_to_64.rs
index 1ac6d52580..bc26f9a169 100644
--- a/jit-compiler/src/includes/field_generic_up_to_64.rs
+++ b/jit-compiler/src/includes/field_generic_up_to_64.rs
@@ -14,20 +14,18 @@ impl std::fmt::Display for FieldElement {
 }
 
 impl From<IntType> for FieldElement {
-    #[inline]
     fn from(i: IntType) -> Self {
         Self(i)
     }
 }
 impl From<FieldElement> for IntType {
-    #[inline]
     fn from(f: FieldElement) -> Self {
         f.0
     }
 }
 impl std::ops::Add for FieldElement {
     type Output = Self;
-    #[inline]
+
     fn add(self, b: Self) -> Self {
         // TODO this is inefficient.
         Self(
@@ -40,7 +38,7 @@ impl std::ops::Add for FieldElement {
 }
 impl std::ops::Sub for FieldElement {
     type Output = Self;
-    #[inline]
+
     fn sub(self, b: Self) -> Self {
         // TODO this is inefficient.
         Self(
@@ -54,7 +52,7 @@ impl std::ops::Sub for FieldElement {
 }
 impl std::ops::Neg for FieldElement {
     type Output = Self;
-    #[inline]
+
     fn neg(self) -> Self {
         if self.0 == 0 {
             self
@@ -65,7 +63,7 @@ impl std::ops::Neg for FieldElement {
 }
 impl std::ops::Mul<FieldElement> for FieldElement {
     type Output = Self;
-    #[inline]
+
     fn mul(self, b: FieldElement) -> FieldElement {
         // TODO this is inefficient.
         Self(
@@ -78,7 +76,7 @@ impl std::ops::Mul<FieldElement> for FieldElement {
 }
 impl std::ops::Div<FieldElement> for FieldElement {
     type Output = Self;
-    #[inline]
+
     fn div(self, b: FieldElement) -> FieldElement {
         if b.0 == 0 {
             panic!("Division by zero");
@@ -99,7 +97,7 @@ impl std::ops::Div<FieldElement> for FieldElement {
         }
     }
 }
-#[inline]
+
 fn try_integer_div_without_remainder(a: IntType, b: IntType) -> Option<IntType> {
     (a % b == 0).then(|| a / b)
 }
@@ -108,20 +106,20 @@ fn full_field_div(_: FieldElement, _: FieldElement) -> FieldElement {
     // TODO generate the algorithm we use for goldilocks
     // for a generic prime field.
 }
-#[inline]
+
 fn integer_div(a: FieldElement, b: FieldElement) -> FieldElement {
     FieldElement(a.0 / b.0)
 }
 impl std::ops::BitAnd<FieldElement> for FieldElement {
     type Output = Self;
-    #[inline]
+
     fn bitand(self, b: FieldElement) -> FieldElement {
         Self(self.0 & b.0)
     }
 }
 impl std::ops::BitOr<FieldElement> for FieldElement {
     type Output = Self;
-    #[inline]
+
     fn bitor(self, b: FieldElement) -> FieldElement {
         Self(self.0 | b.0)
     }
diff --git a/jit-compiler/src/includes/field_goldilocks.rs b/jit-compiler/src/includes/field_goldilocks.rs
index 4267af7fa8..fcca51f49d 100644
--- a/jit-compiler/src/includes/field_goldilocks.rs
+++ b/jit-compiler/src/includes/field_goldilocks.rs
@@ -73,19 +73,16 @@ impl GoldilocksField {
         res
     }
 
-    #[inline(always)]
     fn from_canonical_u64(n: u64) -> Self {
         debug_assert!(n < Self::ORDER);
         Self(n)
     }
 
-    #[inline]
     fn to_canonical_u64(self) -> u64 {
         self.0
     }
 }
 
-#[inline]
 fn wrap(x: u64) -> u64 {
     if x >= GoldilocksField::ORDER {
         x - GoldilocksField::ORDER
@@ -97,7 +94,6 @@ fn wrap(x: u64) -> u64 {
 impl std::ops::Neg for GoldilocksField {
     type Output = Self;
 
-    #[inline]
     fn neg(self) -> Self {
         if self.0 == 0 {
             self
@@ -110,7 +106,6 @@ impl std::ops::Neg for GoldilocksField {
 impl std::ops::Add for GoldilocksField {
     type Output = Self;
 
-    #[inline]
     #[allow(clippy::suspicious_arithmetic_impl)]
     fn add(self, rhs: Self) -> Self {
         let (sum, over) = self.0.overflowing_add(rhs.0);
@@ -123,7 +118,6 @@ impl std::ops::Add for GoldilocksField {
 impl std::ops::Sub for GoldilocksField {
     type Output = Self;
 
-    #[inline]
     #[allow(clippy::suspicious_arithmetic_impl)]
     fn sub(self, rhs: Self) -> Self {
         let (diff, under) = self.0.overflowing_sub(rhs.0);
@@ -171,7 +165,6 @@ impl std::ops::Div for GoldilocksField {
     }
 }
 
-#[inline]
 fn try_integer_div_without_remainder(a: u64, b: u64) -> Option<u64> {
     (a % b == 0).then(|| a / b)
 }
@@ -185,7 +178,7 @@ fn full_field_div(a: GoldilocksField, b: GoldilocksField) -> GoldilocksField {
 ///   - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001.
 ///   - It is only faster in some circumstances. In particular, on x86 it overwrites both inputs in
 ///     the registers, so its use is not recommended when either input will be used again.
-#[inline(always)]
+
 #[cfg(target_arch = "x86_64")]
 unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
     let res_wrapped: u64;
@@ -212,7 +205,6 @@ unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
     res_wrapped + adjustment
 }
 
-#[inline(always)]
 #[cfg(not(target_arch = "x86_64"))]
 const unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
     let (res_wrapped, carry) = x.overflowing_add(y);
@@ -221,7 +213,7 @@ const unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
 }
 
 /// Reduces to a 64-bit value. The result is in canonical form.
-#[inline]
+
 fn reduce128(x: u128) -> GoldilocksField {
     let (x_lo, x_hi) = split(x); // This is a no-op
     let x_hi_hi = x_hi >> 32;
@@ -239,17 +231,15 @@ fn reduce128(x: u128) -> GoldilocksField {
 }
 
 /// Squares the base N number of times and multiplies the result by the tail value.
-#[inline(always)]
+
 fn exp_acc<const N: usize>(base: GoldilocksField, tail: GoldilocksField) -> GoldilocksField {
     base.exp_power_of_2(N) * tail
 }
 
-#[inline]
 const fn split(x: u128) -> (u64, u64) {
     (x as u64, (x >> 64) as u64)
 }
 
-#[inline(always)]
 #[cfg(target_arch = "x86_64")]
 fn assume(p: bool) {
     debug_assert!(p);
@@ -268,7 +258,7 @@ fn assume(p: bool) {
 ///         y = bar();
 ///     }
 /// This function has no semantics. It is a hint only.
-#[inline(always)]
+
 fn branch_hint() {
     // NOTE: These are the currently supported assembly architectures. See the
     // [nightly reference](https://doc.rust-lang.org/nightly/reference/inline-assembly.html) for
@@ -287,14 +277,12 @@ fn branch_hint() {
 }
 
 impl From<u64> for GoldilocksField {
-    #[inline]
     fn from(n: u64) -> Self {
         Self(wrap(n))
     }
 }
 
 impl From<FieldElement> for IntType {
-    #[inline]
     fn from(f: FieldElement) -> Self {
         f.0
     }
@@ -306,21 +294,20 @@ impl std::fmt::Display for GoldilocksField {
     }
 }
 
-#[inline]
 fn integer_div(a: GoldilocksField, b: GoldilocksField) -> GoldilocksField {
     GoldilocksField(a.0 / b.0)
 }
 
 impl std::ops::BitAnd<u64> for GoldilocksField {
     type Output = Self;
-    #[inline]
+
     fn bitand(self, b: u64) -> GoldilocksField {
         Self(self.0 & b)
     }
 }
 impl std::ops::BitOr<GoldilocksField> for GoldilocksField {
     type Output = Self;
-    #[inline]
+
     fn bitor(self, b: GoldilocksField) -> GoldilocksField {
         Self(self.0 | b.0)
     }
diff --git a/std/machines/hash/keccakf32_memory.asm b/std/machines/hash/keccakf32_memory.asm
index 3bc1507a47..e23b0fb235 100644
--- a/std/machines/hash/keccakf32_memory.asm
+++ b/std/machines/hash/keccakf32_memory.asm
@@ -596,7 +596,7 @@ machine Keccakf32Memory(mem: Memory) with
 
     query |row| compute_from_multi(
         c, row, a,
-        |a_fe| array::new(array::len(c), |i| {
+        |a_fe| array::new(5 * 64, |i| {
             let x = i / 64;
             let z = i % 64;
             let limb = z / 32;
@@ -628,7 +628,7 @@ machine Keccakf32Memory(mem: Memory) with
 
     query |row| compute_from_multi(
         a_prime, row, a + c + c_prime,
-        |inputs| array::new(array::len(a_prime), |i| {
+        |inputs| array::new(5 * 5 * 64, |i| {
             let y = i / 320;
             let x = (i / 64) % 5;
             let z = i % 64;
@@ -642,16 +642,4 @@ machine Keccakf32Memory(mem: Memory) with
             fe(((int(a_elem) >> bit_in_limb) & 0x1) ^ int(c_elem) ^ int(c_prime_elem))
         }));
 
-    // TODO: This hint is correct but not needed (the solver can figure this out).
-    // We keep it here because it prevents the JIT solver from succeeding (because of the
-    // use of `provide_value`), because it currently fails when compiling Rust code.
-    // Once these issues are resolved, we can remove this hint.
-    query |row| {
-        std::prover::provide_value(
-            a_prime_prime_0_0_bits[0], 
-            row, 
-            fe((int(eval(a_prime_prime[0]))) & 0x1)
-        );
-    };
-
 }
diff --git a/std/machines/large_field/arith.asm b/std/machines/large_field/arith.asm
index 2a85e7b757..760ec8fca9 100644
--- a/std/machines/large_field/arith.asm
+++ b/std/machines/large_field/arith.asm
@@ -138,12 +138,12 @@ machine Arith with
     
     let combine: expr[] -> expr[] = |x| array::new(array::len(x) / 2, |i| x[2 * i + 1] * 2**16 + x[2 * i]);
     // Intermediate polynomials, arrays of 8 columns, 32 bit per column.
-    col x1c[8] = combine(x1);
-    col y1c[8] = combine(y1);
-    col x2c[8] = combine(x2);
-    col y2c[8] = combine(y2);
-    col x3c[8] = combine(x3);
-    col y3c[8] = combine(y3);
+    let x1c = combine(x1);
+    let y1c = combine(y1);
+    let x2c = combine(x2);
+    let y2c = combine(y2);
+    let x3c = combine(x3);
+    let y3c = combine(y3);
 
     let CLK32: col[32] = array::new(32, |i| |row| if row % 32 == i { 1 } else { 0 });
     let CLK32_31: expr = CLK32[31];
@@ -346,11 +346,11 @@ machine Arith with
     // TODO: To reduce the degree of the constraints, these intermediate columns should be materialized.
     // However, witgen doesn't work currently if we do, likely because for some operations, not all inputs are
     // available.
-    col eq0_sum = sum(32, |i| eq0(i) * CLK32[i]);
-    col eq1_sum = sum(32, |i| eq1(i) * CLK32[i]);
-    col eq2_sum = sum(32, |i| eq2(i) * CLK32[i]);
-    col eq3_sum = sum(32, |i| eq3(i) * CLK32[i]);
-    col eq4_sum = sum(32, |i| eq4(i) * CLK32[i]);
+    let eq0_sum = sum(32, |i| eq0(i) * CLK32[i]);
+    let eq1_sum = sum(32, |i| eq1(i) * CLK32[i]);
+    let eq2_sum = sum(32, |i| eq2(i) * CLK32[i]);
+    let eq3_sum = sum(32, |i| eq3(i) * CLK32[i]);
+    let eq4_sum = sum(32, |i| eq4(i) * CLK32[i]);
 
     selEq[0] * (eq0_sum + carry[0]) = selEq[0] * carry[0]' * 2**16;
     selEq[1] * (eq1_sum + carry[0]) = selEq[1] * carry[0]' * 2**16;
diff --git a/std/machines/large_field/arith256_memory.asm b/std/machines/large_field/arith256_memory.asm
index 51dcc3abe7..f452f11e3b 100644
--- a/std/machines/large_field/arith256_memory.asm
+++ b/std/machines/large_field/arith256_memory.asm
@@ -286,12 +286,12 @@ machine Arith256Memory(mem: Memory) with
 
     let combine: expr[] -> expr[] = |x| array::new(array::len(x) / 2, |i| x[2 * i + 1] * 2**16 + x[2 * i]);
     // Intermediate polynomials, arrays of 8 columns, 32 bit per column.
-    col x1c[8] = combine(x1);
-    col y1c[8] = combine(y1);
-    col x2c[8] = combine(x2);
-    col y2c[8] = combine(y2);
-    col x3c[8] = combine(x3);
-    col y3c[8] = combine(y3);
+    let x1c = combine(x1);
+    let y1c = combine(y1);
+    let x2c = combine(x2);
+    let y2c = combine(y2);
+    let x3c = combine(x3);
+    let y3c = combine(y3);
 
     let CLK32: col[32] = array::new(32, |i| |row| if row % 32 == i { 1 } else { 0 });
     let CLK32_31: expr = CLK32[31];
@@ -501,11 +501,11 @@ machine Arith256Memory(mem: Memory) with
     // TODO: To reduce the degree of the constraints, these intermediate columns should be materialized.
     // However, witgen doesn't work currently if we do, likely because for some operations, not all inputs are
     // available.
-    col eq0_sum = sum(32, |i| eq0(i) * CLK32[i]);
-    col eq1_sum = sum(32, |i| eq1(i) * CLK32[i]);
-    col eq2_sum = sum(32, |i| eq2(i) * CLK32[i]);
-    col eq3_sum = sum(32, |i| eq3(i) * CLK32[i]);
-    col eq4_sum = sum(32, |i| eq4(i) * CLK32[i]);
+    let eq0_sum = sum(32, |i| eq0(i) * CLK32[i]);
+    let eq1_sum = sum(32, |i| eq1(i) * CLK32[i]);
+    let eq2_sum = sum(32, |i| eq2(i) * CLK32[i]);
+    let eq3_sum = sum(32, |i| eq3(i) * CLK32[i]);
+    let eq4_sum = sum(32, |i| eq4(i) * CLK32[i]);
 
     selEq[0] * (eq0_sum + carry[0]) = selEq[0] * carry[0]' * 2**16;
     selEq[1] * (eq1_sum + carry[0]) = selEq[1] * carry[0]' * 2**16;