diff --git a/cprover_bindings/src/goto_program/location.rs b/cprover_bindings/src/goto_program/location.rs index 9a2a046f6387..f4e144de5ad0 100644 --- a/cprover_bindings/src/goto_program/location.rs +++ b/cprover_bindings/src/goto_program/location.rs @@ -124,6 +124,20 @@ impl Location { } } + /// Tries to set the `function` field of a [Location::Loc], but returns `None` if the location + /// is of a different variant and the field couldn't be set. + pub fn try_set_function(&mut self, new_function: Option) -> Option<()> { + if let Location::Loc { function, .. } = self { + if let Some(new_function) = new_function { + *function = Some(new_function); + } + + Some(()) + } else { + None + } + } + /// Create a Property type Location pub fn property_location( file: Option, diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/impl_no_stats.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/impl_no_stats.rs new file mode 100644 index 000000000000..3d5964558069 --- /dev/null +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/impl_no_stats.rs @@ -0,0 +1,73 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A core implementation of a unified codegen cache. + +use crate::codegen_cprover_gotoc::codegen::cache::{CACHE, CacheEntry, CodegenCacheVal}; + +#[macro_export] +macro_rules! implement_cache { + ($name:tt -- $($(@$global:tt)? [$field_name:tt] $key:path => $val:path),+) => { + #[derive(Default)] + /// The unified codegen cache used to store codegen work. + pub struct $name { + $($field_name: HashImpl<$key, $val>,)* + } + + /// Reset the thread local [CodegenCache] between harnesses. + /// Fields marked with `@global` won't be cleared. + pub fn clear_codegen_cache() { + CACHE.with_borrow_mut(|cache|{ + $( + $crate::clear_cache_field!($($global)? cache, $val); + )* + }) + } + + // Implement [CodegenCacheVal] for all of the provided cache values, so that they can be accessed + // in the cache. + $( + impl CodegenCacheVal for $val { + type Key = $key; + fn get_individual_cache(cache: &CodegenCache) -> &HashImpl { + &cache.$field_name + } + fn get_individual_cache_mut(cache: &mut CodegenCache) -> &mut HashImpl { + &mut cache.$field_name + } + } + )* + }; +} + +pub struct NoStatsEntry(Option, V::Key); + +pub fn cache_entry_impl(key: V::Key) -> NoStatsEntry { + let found_value = CACHE.with_borrow(|cache| V::get_individual_cache(cache).get(&key).cloned()); + NoStatsEntry(found_value, key) +} + +impl CacheEntry for NoStatsEntry { + type EntryVal = V; + + fn tweak(mut self, f: F) -> Self { + if let Some(found_val) = &mut self.0 { + f(found_val) + } + + self + } + + fn or_insert_with V>(self, f: F) -> V { + match self.0 { + Some(cached) => cached, + None => { + let calculated = f(); + CACHE.with_borrow_mut(|cache| { + V::get_individual_cache_mut(cache).insert(self.1, calculated.clone()) + }); + calculated + } + } + } +} diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/impl_stats.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/impl_stats.rs new file mode 100644 index 000000000000..6503d1d29aa4 --- /dev/null +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/impl_stats.rs @@ -0,0 +1,210 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A codegen cache implementation that is functionally equivalent to that of `impl_no_stats.rs`, but also +//! records cache hits, misses, and the amount of wall clock time spent on each for debugging analysis. + +use std::time::Duration; + +use crate::codegen_cprover_gotoc::codegen::cache::{ + CACHE, CacheEntry, CodegenCacheVal, impl_stats::stats::CacheTime, +}; + +#[macro_export] +macro_rules! implement_cache { + ($name:tt -- $($(@$global:tt)? [$field_name:tt] $key:path => $val:path),+) => { + #[derive(Default)] + /// The unified codegen cache used to store codegen work. + pub struct $name { + $($field_name: (HashImpl<$key, $val>, impl_stats::stats::CacheStats),)* + } + + /// Reset the thread local [CodegenCache] between harnesses. + /// Fields marked with `@global` won't be cleared. + pub fn clear_codegen_cache() { + CACHE.with_borrow_mut(|cache|{ + $( + $crate::clear_cache_field!($($global)? cache, $val); + )* + }) + } + + impl Drop for $name { + fn drop(&mut self) { + if std::env::var("CACHE_STATS").is_ok() { + self.print_stats(); + } + } + } + + impl $name { + fn print_stats(&self) { + println!("\n***CACHE STATS***"); + let mut all_stats = Vec::new(); + $( + let name = std::any::type_name::<$val>(); + let stats: &impl_stats::stats::CacheStats = &self.$field_name.1; + println!("{name}: {:?}", stats); + all_stats.push(stats); + )* + let (hits, total) = impl_stats::stats::total_hits_and_queries(all_stats.into_iter()); + let hit_rate = hits as f64 / total as f64 * 100_f64; + println!("TOTAL: {hits} hits / {total} queries ({hit_rate:.2?}%)\n"); + } + } + + // Implement [CodegenCacheVal] for all of the provided values, so that they can be accessed + // in the cache. Since we are recording stats, implement that version of the trait. + $( + impl CodegenCacheVal for $val { + type Key = $key; + fn get_individual_cache(cache: &CodegenCache) -> &HashImpl { + &cache.$field_name.0 + } + fn get_individual_cache_mut(cache: &mut CodegenCache) -> &mut HashImpl { + &mut cache.$field_name.0 + } + fn get_individual_stats_mut(cache: &mut CodegenCache) -> &mut impl_stats::stats::CacheStats { + &mut cache.$field_name.1 + } + } + )* + }; +} + +// Stores a result for the value, or the time wasted already on a miss... +pub struct StatsEntry(Result, V::Key); + +pub fn cache_entry_impl(key: V::Key) -> StatsEntry { + let start = std::time::Instant::now(); + let found_value = CACHE.with_borrow(|cache| V::get_individual_cache(cache).get(&key).cloned()); + let query_time = start.elapsed(); + match found_value { + Some(hit_val) => { + insert_cache_timing::(CacheTime::Hit(query_time)); + StatsEntry(Ok(hit_val), key) + } + None => StatsEntry(Err(query_time), key), + } +} + +fn insert_cache_timing(time: CacheTime) { + CACHE.with_borrow_mut(|cache| { + let stats = V::get_individual_stats_mut(cache); + stats.add_time(time); + }) +} + +impl CacheEntry for StatsEntry { + type EntryVal = V; + + fn tweak(mut self, f: F) -> Self { + if let Ok(found_val) = &mut self.0 { + f(found_val) + } + + self + } + + fn or_insert_with V>(self, f: F) -> V { + match self.0 { + Ok(cached_val) => cached_val, + Err(miss_time_already) => { + let start = std::time::Instant::now(); + let calculated = f(); + let calc_time = start.elapsed(); + + CACHE.with_borrow_mut(|cache| { + V::get_individual_cache_mut(cache).insert(self.1, calculated.clone()) + }); + let total_time = start.elapsed() + miss_time_already; + + insert_cache_timing::(CacheTime::Miss { calc_time, total_time }); + + calculated + } + } + } +} + +/// Utilites for recording cache statistics. +pub(crate) mod stats { + use std::time::Duration; + + #[derive(Default, Clone)] + pub(crate) struct CacheStats { + /// The end to end time taken to return a result for each cache hit. + hit_times: Vec, + /// The end to end time taken to return a result for each cache miss. + miss_times: Vec, + /// The time taken to calculate a new entry's value on each cache miss + /// (essentially the miss time minus the time spent inserting the new value into the cache). + calc_times: Vec, + } + + impl CacheStats { + /// The number of queries to this cache which hit. + pub fn hits(&self) -> usize { + self.hit_times.len() + } + + /// The number of queries to this cache which missed. + pub fn misses(&self) -> usize { + // Since each miss should contain a calculations, the number of misses + // should always be equal to the number of calculations. + assert_eq!(self.calc_times.len(), self.miss_times.len()); + self.miss_times.len() + } + + pub fn add_time(&mut self, time: CacheTime) { + match time { + CacheTime::Hit(time) => { + self.hit_times.push(time); + } + CacheTime::Miss { calc_time, total_time } => { + self.calc_times.push(calc_time); + self.miss_times.push(total_time); + } + } + } + } + + pub enum CacheTime { + Hit(Duration), + Miss { calc_time: Duration, total_time: Duration }, + } + + impl std::fmt::Debug for CacheStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let (hits, total) = (self.hits(), self.hits() + self.misses()); + let hit_rate = hits as f64 / total as f64 * 100_f64; + write!(f, " {hits} hits / {total} queries ({hit_rate:.2?}%)")?; + + if self.hits() + self.misses() != 0 { + write!( + f, + "- avg hit time: {:?}, avg miss time: {:?} (of which {:?} was calc)", + avg_duration(&self.hit_times), + avg_duration(&self.miss_times), + avg_duration(&self.calc_times) + )?; + } + + std::fmt::Result::Ok(()) + } + } + + fn avg_duration(durations: &[Duration]) -> Duration { + let len = durations.len() as u32; + let sum: Duration = durations.iter().sum(); + sum / len + } + + pub(crate) fn total_hits_and_queries<'a>( + stats: impl Iterator, + ) -> (usize, usize) { + stats.fold(<(usize, usize) as Default>::default(), |acc, stats| { + (acc.0 + stats.hits(), acc.1 + stats.misses() + stats.hits()) + }) + } +} diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/mod.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/mod.rs new file mode 100644 index 000000000000..b6caf1e08c46 --- /dev/null +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/cache/mod.rs @@ -0,0 +1,113 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! This module provides a 'unified' cache for Kani's codegen. +//! +//! The cache is 'unified' in the sense that it can cache multiple different pieces of our codegen, +//! but each can be queried and contained in a single struct. +//! +//! The core part of this implementation is the [implement_cache!] macro, which expands to the full +//! struct of the [CodegenCache], and all its needed implementation. In debug mode, this will use +//! the implementation from `impl_stats.rs`, which instruments all queries to capture cache statistics and +//! print them when the [CodegenCache] gets dropped after codegen. In release mode, it will instead use the +//! implementation from `impl_no_stats.rs`, which just has the core cache implementation. +//! +//! To interact with the cache elsewhere in Kani, just call [cache_entry] and pass in a key type from where +//! the cache is defined with the [implement_cache!] macro below. This will return a cache entry for the value type, +//! which you can then call [or_insert_with](CacheEntry::or_insert_with) on to get the cached value, or insert one if +//! none was found. +//! +//! If you wish to add another element to the cache, add another row to the [implement_cache!] macro call below, +//! and this will allow you to use [cache_entry] with that new key type. + +use rustc_data_structures::fx::FxHashMap; +use std::cell::RefCell; +use std::hash::Hash; + +#[cfg(debug_assertions)] +pub mod impl_stats; +#[cfg(debug_assertions)] +pub use impl_stats::cache_entry_impl; + +#[cfg(not(debug_assertions))] +pub mod impl_no_stats; +#[cfg(not(debug_assertions))] +pub use impl_no_stats::cache_entry_impl; + +// This will automatically point to the `implement_cache` macro for whichever implementation +// module is currently cfg-ed in. +use crate::implement_cache; + +thread_local! { + /// The thread-local codegen cache. Since currently codegen is constrainted to be done + /// in a single thread (since the compiler's [TyCtxt](rustc_middle::ty::TyCtxt) isn't `Send`), + /// we only ever need the cache in that one thread. + pub static CACHE: RefCell = RefCell::new(Default::default()); +} + +/// The hashmap implementation used to store data in the cache. +type HashImpl = FxHashMap; + +// Define the actual cache implementation. This will expand to the struct defintion and +// implementation based on whether or not we want to record cache statistics. +// +// Each row of the macro represents the cache for a different type, whose syntax is: +// `@granularity [name_of_field_in_struct] KeyType => ValueType` +// +// See [clear_cache_field] below for how the `@granularity` annotation works. +implement_cache!(CodegenCache -- + [types] rustc_public::ty::Ty => cbmc::goto_program::Type, + @global [spans] rustc_public::ty::Span => cbmc::goto_program::Location +); + +/// Get the cache entry for a specific key. +pub fn cache_entry(key: V::Key) -> impl CacheEntry { + cache_entry_impl(key) +} + +/// The trait for an entry in the cache, that provides the core API without us having to know how +/// the cache is implemented (namely whether stats are being recorded or not). +pub trait CacheEntry { + /// The the of the value that this cache entry holds. + type EntryVal: CodegenCacheVal; + + /// Applies `f` to modify the value found in the cache, if there was one. + fn tweak(self, f: F) -> Self; + + /// Returns the cached value if there was one, or inserts a new value to the cache by calling `f`. + fn or_insert_with Self::EntryVal>(self, f: F) -> Self::EntryVal; +} + +/// A type whose value can be stored in the codegen cache and retrived with a +/// specific corresponding [Key](CodegenCacheVal::Key) type. +pub trait CodegenCacheVal: Clone +where + Self: Sized, +{ + type Key: Hash + Eq; + + /// Gets the underlying [HashImpl] used to cache this type in the unified [CodegenCache]. + fn get_individual_cache(cache: &CodegenCache) -> &HashImpl; + /// Mutably gets the underlying [HashImpl] used to cache this type in the unified [CodegenCache]. + fn get_individual_cache_mut(cache: &mut CodegenCache) -> &mut HashImpl; + + #[cfg(debug_assertions)] + /// Mutably gets the struct used to store statistics on cache performance for this type + /// in the unified [CodegenCache]. + fn get_individual_stats_mut(cache: &mut CodegenCache) -> &mut impl_stats::stats::CacheStats; +} + +#[macro_export] +/// Clears the cache field for a given type. +macro_rules! clear_cache_field { + ( $cache:tt, $field_val:ty) => { + // if no granularity is provided, default to clearing the field per-harness + $crate::clear_cache_field!(per_harness $cache, $field_val); + }; + (per_harness $cache:tt, $field_val:ty) => { + <$field_val>::get_individual_cache_mut($cache).clear(); + }; + (global $cache:tt, $field_val:ty) => { + /* global field, don't clear cache */ + }; +} diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/mod.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/mod.rs index f176cbbb88d2..7e7f006611b8 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/codegen/mod.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/mod.rs @@ -6,6 +6,7 @@ mod assert; mod block; +pub(crate) mod cache; mod foreign_function; mod function; mod intrinsic; diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs index cf0c5e4e376a..fe24db225959 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs @@ -4,6 +4,7 @@ //! MIR Span related functions use crate::codegen_cprover_gotoc::GotocCtx; +use crate::codegen_cprover_gotoc::codegen::cache::{CacheEntry, cache_entry}; use cbmc::goto_program::Location; use lazy_static::lazy_static; use rustc_hir::Attribute; @@ -37,6 +38,15 @@ impl GotocCtx<'_, '_> { } pub fn codegen_span_stable(&self, sp: SpanStable) -> Location { + cache_entry::(sp) + .tweak(|res| { + res.try_set_function(self.current_fn.as_ref().map(|x| x.interned_readable_name())) + .unwrap(); + }) + .or_insert_with(|| self.codegen_span_stable_inner(sp)) + } + + pub fn codegen_span_stable_inner(&self, sp: SpanStable) -> Location { // Attribute to mark functions as where automatic pointer checks should not be generated. let should_skip_ptr_checks_attr = vec![ rustc_span::symbol::Symbol::intern("kanitool"), @@ -66,6 +76,7 @@ impl GotocCtx<'_, '_> { .leak() // This is to preserve `Location` being Copy, but could blow up the memory utilization of compiler. }; let loc = sp.get_lines(); + Location::new( sp.get_filename().to_string(), self.current_fn.as_ref().map(|x| x.readable_name().to_string()), diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/ty_stable.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/ty_stable.rs index f1ad07f3722d..076a29f22984 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/codegen/ty_stable.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/ty_stable.rs @@ -7,6 +7,7 @@ //! `typ.rs`. use crate::codegen_cprover_gotoc::GotocCtx; +use crate::codegen_cprover_gotoc::codegen::cache::{CacheEntry, cache_entry}; use crate::kani_middle::abi::LayoutOf; use cbmc::goto_program::Type; use rustc_middle::ty::layout::{LayoutOf as _, TyAndLayout}; @@ -21,7 +22,8 @@ impl<'tcx, 'r> GotocCtx<'tcx, 'r> { } pub fn codegen_ty_stable(&mut self, ty: Ty) -> Type { - self.codegen_ty(rustc_internal::internal(self.tcx, ty)) + cache_entry::(ty) + .or_insert_with(|| self.codegen_ty(rustc_internal::internal(self.tcx, ty))) } pub fn codegen_ty_ref_stable(&mut self, ty: Ty) -> Type { diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index e7b3fdaf0a2f..0011cf3071c9 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -6,7 +6,7 @@ use crate::args::ReachabilityType; use crate::codegen_cprover_gotoc::context::MinimalGotocCtx; use crate::codegen_cprover_gotoc::utils::file_writing_pool::{FileDataToWrite, ThreadPool}; -use crate::codegen_cprover_gotoc::{GotocCtx, context}; +use crate::codegen_cprover_gotoc::{GotocCtx, clear_codegen_cache, context}; use crate::kani_middle::analysis; use crate::kani_middle::attributes::KaniAttributes; use crate::kani_middle::check_reachable_items; @@ -404,6 +404,8 @@ impl CodegenBackend for GotocCodegenBackend { BodyTransformation::new(&queries, tcx, unit); for harness in &unit.harnesses { + clear_codegen_cache(); + let model_path = units.harness_model_path(*harness).unwrap(); let is_automatic_harness = units.is_automatic_harness(harness); let contract_metadata = diff --git a/kani-compiler/src/codegen_cprover_gotoc/context/current_fn.rs b/kani-compiler/src/codegen_cprover_gotoc/context/current_fn.rs index 7a2be6b06c5a..cf23b58c2daa 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/context/current_fn.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/context/current_fn.rs @@ -4,11 +4,13 @@ use crate::codegen_cprover_gotoc::GotocCtx; use cbmc::InternedString; use cbmc::goto_program::Stmt; +use rustc_data_structures::fx::FxHashMap; use rustc_middle::ty::Instance as InstanceInternal; use rustc_public::CrateDef; use rustc_public::mir::mono::Instance; use rustc_public::mir::{Body, Local, LocalDecl, Rvalue, visit::Location, visit::MirVisitor}; use rustc_public::rustc_internal; +use std::cell::RefCell; use std::collections::{HashMap, HashSet}; /// This structure represents useful data about the function we are currently compiling. @@ -32,6 +34,9 @@ pub struct CurrentFnCtx<'tcx> { name: String, /// A human readable pretty name for the current function readable_name: String, + /// The interned version of `readable_name`. This allows us to avoid re-interning + /// that string every time we want to use it internally. + interned_readable_name: InternedString, /// A counter to enable creating temporary variables temp_var_counter: u64, } @@ -54,12 +59,23 @@ impl MirVisitor for AddressTakenLocalsCollector { } } +thread_local! { + /// Stores (`name`, `mangled_name`) pairs for each [Instance]. + pub static INSTANCE_NAME_CACHE: RefCell> = RefCell::new(FxHashMap::default()); +} + +/// Returns the (`name`, `mangled_name`) pair for an [Instance] from the cache, computing it if no entry exists. +fn instance_names(instance: &Instance) -> (String, String) { + INSTANCE_NAME_CACHE.with_borrow_mut(|cache| { + cache.entry(*instance).or_insert_with(|| (instance.name(), instance.mangled_name())).clone() + }) +} + /// Constructor impl<'tcx> CurrentFnCtx<'tcx> { pub fn new(instance: Instance, gcx: &GotocCtx<'tcx, '_>, body: &Body) -> Self { + let (readable_name, name) = instance_names(&instance); let instance_internal = rustc_internal::internal(gcx.tcx, instance); - let readable_name = instance.name(); - let name = instance.mangled_name(); let locals = body.locals().to_vec(); let local_names = body .var_debug_info @@ -77,6 +93,7 @@ impl<'tcx> CurrentFnCtx<'tcx> { local_names, address_taken_locals: visitor.address_taken_locals, name, + interned_readable_name: (&readable_name).into(), readable_name, temp_var_counter: 0, } @@ -122,6 +139,10 @@ impl<'tcx> CurrentFnCtx<'tcx> { &self.readable_name } + pub fn interned_readable_name(&self) -> InternedString { + self.interned_readable_name + } + pub fn locals(&self) -> &[LocalDecl] { &self.locals } diff --git a/kani-compiler/src/codegen_cprover_gotoc/mod.rs b/kani-compiler/src/codegen_cprover_gotoc/mod.rs index 7454d748d660..15d15c6b863e 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/mod.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/mod.rs @@ -6,6 +6,7 @@ mod context; mod overrides; mod utils; +pub use codegen::cache::clear_codegen_cache; pub use compiler_interface::{GotocCodegenBackend, UnsupportedConstructs}; pub use context::GotocCtx; pub use context::VtableCtx;