From 964f6c65a965e60f0c4be4a5efefdf5193e76ed1 Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Wed, 25 Feb 2026 23:06:45 -0500 Subject: [PATCH 01/10] init commit --- .gitignore | 4 + src/lind-boot/.cargo/config.toml | 2 + src/lind-boot/Cargo.toml | 7 + src/lind-boot/src/cli.rs | 10 ++ src/lind-boot/src/lind_wasmtime/trampoline.rs | 10 ++ src/lind-boot/src/main.rs | 45 ++++++ src/lind-boot/src/perf.rs | 57 +++++++ src/lind-perf/Cargo.lock | 7 + src/lind-perf/Cargo.toml | 6 + src/lind-perf/README.md | 112 ++++++++++++++ src/lind-perf/src/counter.rs | 142 ++++++++++++++++++ src/lind-perf/src/lib.rs | 7 + src/lind-perf/src/report.rs | 74 +++++++++ src/lind-perf/src/timers.rs | 103 +++++++++++++ src/rawposix/Cargo.toml | 2 + src/threei/Cargo.toml | 6 +- src/wasmtime/crates/lind-common/Cargo.toml | 2 + src/wasmtime/crates/lind-perf | 1 + 18 files changed, 595 insertions(+), 2 deletions(-) create mode 100644 src/lind-boot/.cargo/config.toml create mode 100644 src/lind-boot/src/perf.rs create mode 100644 src/lind-perf/Cargo.lock create mode 100644 src/lind-perf/Cargo.toml create mode 100644 src/lind-perf/README.md create mode 100644 src/lind-perf/src/counter.rs create mode 100644 src/lind-perf/src/lib.rs create mode 100644 src/lind-perf/src/report.rs create mode 100644 src/lind-perf/src/timers.rs create mode 120000 src/wasmtime/crates/lind-perf diff --git a/.gitignore b/.gitignore index 7b33f3b54..e36ac5b52 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ scripts/object_lists_final/ e2e_status report.html results.json + +#Ignore local files +.DS_Store +target/ diff --git a/src/lind-boot/.cargo/config.toml b/src/lind-boot/.cargo/config.toml new file mode 100644 index 000000000..00deea9ff --- /dev/null +++ b/src/lind-boot/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +features = ["lind_perf"] diff --git a/src/lind-boot/Cargo.toml b/src/lind-boot/Cargo.toml index 3653c017f..ad7b40cab 100644 --- a/src/lind-boot/Cargo.toml +++ b/src/lind-boot/Cargo.toml @@ -7,6 +7,12 @@ edition = "2024" disable_signals = ["cage/disable_signals", "wasmtime-lind-multi-process/disable_signals"] secure = ["typemap/secure"] lind_debug = ["wasmtime-lind-common/lind_debug"] +lind_perf = [ + "dep:lind-perf", + "threei/lind_perf", + "rawposix/lind_perf", + "wasmtime-lind-common/lind_perf", +] [dependencies] wasmtime-lind-common = { path = "../wasmtime/crates/lind-common" } @@ -22,6 +28,7 @@ wasmtime-lind-3i = { path = "../wasmtime/crates/lind-3i" } wasmtime = { path = "../wasmtime/crates/wasmtime", features = ["cranelift", "pooling-allocator", "gc", "threads", "demangle", "addr2line", "cache"], default-features = false } wasmtime-wasi-threads = { path = "../wasmtime/crates/wasi-threads" } wasmtime-wasi = { version = "23.0.0", features = ["preview1"] , default-features = false } +lind-perf = { path = "../wasmtime/crates/lind-perf", optional = true } wiggle = { version = "23.0.0", default-features = false } anyhow = { version = "1.0.66", default-features = false } diff --git a/src/lind-boot/src/cli.rs b/src/lind-boot/src/cli.rs index 68df0d289..d41dde20f 100644 --- a/src/lind-boot/src/cli.rs +++ b/src/lind-boot/src/cli.rs @@ -39,6 +39,16 @@ pub struct CliOptions { /// cause the environment variable `FOO` to be inherited. #[arg(long = "env", number_of_values = 1, value_name = "NAME[=VAL]", value_parser = parse_env_var)] pub vars: Vec<(String, Option)>, + + /// Run performance benchmark with CLOCK_GETTIME (requires the `lind_perf` feature) + #[cfg(feature = "lind_perf")] + #[arg(long)] + pub perf: bool, + + /// Run performance benchmarks with TSC (requires the `lind_perf` feature) + #[cfg(feature = "lind_perf")] + #[arg(long)] + pub perftsc: bool, } pub fn parse_env_var(s: &str) -> Result<(String, Option), String> { diff --git a/src/lind-boot/src/lind_wasmtime/trampoline.rs b/src/lind-boot/src/lind_wasmtime/trampoline.rs index 214b8b96f..65037c8ac 100644 --- a/src/lind-boot/src/lind_wasmtime/trampoline.rs +++ b/src/lind-boot/src/lind_wasmtime/trampoline.rs @@ -6,6 +6,9 @@ use wasmtime::{Caller, Instance}; use wasmtime_lind_3i::{VmCtxWrapper, get_vmctx, set_vmctx}; use wasmtime_lind_multi_process; +#[cfg(feature = "lind_perf")] +use crate::perf; + /// The callback function registered with 3i uses a unified Wasm entry /// function as the single re-entry point into the Wasm executable. /// @@ -45,12 +48,19 @@ pub extern "C" fn grate_callback_trampoline( arg6: u64, arg6cageid: u64, ) -> i32 { + #[cfg(feature = "lind_perf")] + let _grate_callback_timer = perf::enabled::GRATE_CALLBACK_TRAMPOLINE.get_timer(); + + + #[cfg(feature = "lind_perf")] + let _vmctx_timer = perf::enabled::TRAMPOLINE_GET_VMCTX.get_timer(); let vmctx_wrapper: VmCtxWrapper = match get_vmctx(cageid) { Some(v) => v, None => { panic!("no VMContext found for cage_id {}", cageid); } }; + drop(_vmctx_timer); // Convert back to VMContext let opaque: *mut VMOpaqueContext = vmctx_wrapper.as_ptr() as *mut VMOpaqueContext; diff --git a/src/lind-boot/src/main.rs b/src/lind-boot/src/main.rs index 145ac20cb..774534745 100644 --- a/src/lind-boot/src/main.rs +++ b/src/lind-boot/src/main.rs @@ -1,5 +1,6 @@ mod cli; mod lind_wasmtime; +mod perf; use crate::{ cli::CliOptions, @@ -8,6 +9,9 @@ use crate::{ use clap::Parser; use rawposix::init::{rawposix_shutdown, rawposix_start}; +#[cfg(feature = "lind_perf")] +use lind_perf::TimerKind; + /// Entry point of the lind-boot executable. /// /// The expected invocation follows: the first non-flag argument specifies the @@ -21,6 +25,47 @@ use rawposix::init::{rawposix_shutdown, rawposix_start}; fn main() -> Result<(), Box> { let lindboot_cli = CliOptions::parse(); + // Entry point for a lind_perf enabled build. + // + // When run with --perf flags, it performs the required setup and teardown, along with running + // the inputted wasm benchmark multiple times (once per counter). + #[cfg(feature = "lind_perf")] + { + // Determine which timer to use. --perftsc => Rdtsc, --perf => Clock + let kind = if lindboot_cli.perftsc { + Some(TimerKind::Rdtsc) + } else if lindboot_cli.perf { + Some(TimerKind::Clock) + } else { + None + }; + + match kind { + Some(k) => { + // Initiate all counters + perf::enabled::init(k); + + // Iterate over all counters, enable one at a time, run the wasm module. + for name in perf::enabled::all_counter_names() { + perf::enabled::enable_one(name); + + rawposix_start(0); + + let _ = execute_wasmtime(lindboot_cli.clone()); + + rawposix_shutdown(); + } + + // Print the final report. + perf::enabled::report(); + + return Ok(()); + } + // In case neither --perf flag is set, fall back to default lind-boot behaviour. + None => {} + }; + } + // AOT-compile only — no runtime needed if lindboot_cli.precompile { precompile_module(&lindboot_cli)?; diff --git a/src/lind-boot/src/perf.rs b/src/lind-boot/src/perf.rs new file mode 100644 index 000000000..680254a9e --- /dev/null +++ b/src/lind-boot/src/perf.rs @@ -0,0 +1,57 @@ +/// lind-boot's perf file binds together every other module's perf file. +/// +/// This involves: +/// - Reading their COUNTERS +/// - Initializing them +/// - Combining all the COUNTERS into one list to iterate over and sequentially enable +/// - Printing a combined lind-perf report. +#[cfg(feature = "lind_perf")] +pub mod enabled { + use lind_perf::{Counter, TimerKind, enable_name, reset_all, set_timer}; + + // These are counters defined within lind-boot. + pub static GRATE_CALLBACK_TRAMPOLINE: Counter = + Counter::new("lind_boot::grate_callback_trampoline"); + pub static TRAMPOLINE_GET_VMCTX: Counter = Counter::new("lind_boot::trampoline::get_vmctx"); + pub static TRAMPOLINE_GET_PASS_FPTR_TO_WT: Counter = + Counter::new("lind_boot::trampoline::get_pass_fptr_to_wt"); + pub static TRAMPOLINE_TYPED_DISPATCH_CALL: Counter = + Counter::new("lind_boot::trampoline::typed_dispatch_call"); + + pub static LIND_BOOT_COUNTERS: &[&Counter] = &[ + &GRATE_CALLBACK_TRAMPOLINE, + &TRAMPOLINE_GET_VMCTX, + &TRAMPOLINE_GET_PASS_FPTR_TO_WT, + &TRAMPOLINE_TYPED_DISPATCH_CALL, + ]; + + /// Initialize counters for all modules, involves setting the TimerKind and resetting the + /// counts. + pub fn init(kind: TimerKind) { + set_timer(LIND_BOOT_COUNTERS, kind); + + reset_all(LIND_BOOT_COUNTERS); + } + + /// Finds a counter by it's name and searches for it across modules to enable it. Disables all + /// other counters. + pub fn enable_one(name: &str) { + enable_name(LIND_BOOT_COUNTERS, name); + } + + /// Get a list of all counter names. + pub fn all_counter_names() -> Vec<&'static str> { + let mut names = Vec::new(); + names.extend(LIND_BOOT_COUNTERS.iter().map(|c| c.name)); + names + } + + /// Print a report for every module + pub fn report() { + lind_perf::report_header(format!("LIND-BOOT")); + lind_perf::report(LIND_BOOT_COUNTERS); + } +} + +#[cfg(not(feature = "lind_perf"))] +pub mod enabled {} diff --git a/src/lind-perf/Cargo.lock b/src/lind-perf/Cargo.lock new file mode 100644 index 000000000..60e808a28 --- /dev/null +++ b/src/lind-perf/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "lind-perf" +version = "0.1.0" diff --git a/src/lind-perf/Cargo.toml b/src/lind-perf/Cargo.toml new file mode 100644 index 000000000..990476486 --- /dev/null +++ b/src/lind-perf/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "lind-perf" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/src/lind-perf/README.md b/src/lind-perf/README.md new file mode 100644 index 000000000..1b0ac9376 --- /dev/null +++ b/src/lind-perf/README.md @@ -0,0 +1,112 @@ +# lind-perf + +`lind-perf` is a microbenchmarking library for lind-wasm. It generates timing reports for hot +paths in the syscall lifecycle by measuring the total time spent in specific functions across +modules. + +Sample output for running `close(-1)`: + +```bash +FDTABLE Test ................ +--------------------------------------------LIND-BOOT-------------------------------------------- +name calls total avg +------------------------------------------------------------------------------------------------- +lind_boot::load_main_module 1 111.482ms 111.482ms +lind_boot::invoke_func 1 111.282ms 111.282ms + +-------------------------------------------LIND-COMMON------------------------------------------- +name calls total avg +------------------------------------------------------------------------------------------------- +lind_common::add_to_linker::make-syscall 1000000 94.274ms 94.000ns + +---------------------------------------------THREEI---------------------------------------------- +name calls total avg +------------------------------------------------------------------------------------------------- +threei::make_syscall 1000000 90.815ms 90.000ns + +--------------------------------------------RAWPOSIX--------------------------------------------- +name calls total avg +------------------------------------------------------------------------------------------------- +rawposix::close_syscall 1000000 21.255ms 21.000ns + +--------------------------------------------FDTABLES--------------------------------------------- +name calls total avg +------------------------------------------------------------------------------------------------- +fdtables::close_virtualfd 1000000 14.372ms 14.000ns +``` + +## Building + +`lind-perf` is only included in the final binary if `--features lind_perf` is set during build. + +`make lind-boot-perf` is a shorthand for building a `release` version of `lind-boot` with `lind-perf` enabled. + +## Running Benchmarks + +`lind-perf` will generate a report for any module that is run using `lind-boot` with the +`--perf` or `--perftsc` flag. + +e.g. `sudo lind-boot --perf libc_syscall.wasm` + +Standard benchmarks can be run using: [`./scripts/run_microbench.sh`](../../scripts/run_microbench.sh) + +Flags: +- `--perf`: Uses the default Clock timer (nanoseconds) +- `--perftsc`: Uses the `rdtsc` timer (CPU cycles) + +## Internals + +### How the timer works +Each benchmark site is a `Counter`. A counter tracks: +- total elapsed time across calls +- number of calls + +Timing is scoped. The common pattern is: +1. Create a guard at the start of the function. +2. The guard records the start time immediately. +3. When the function returns, the guard is dropped and records the end time. +4. The elapsed time is added to the counter total and the call count increments. + +This means early returns are timed as well. If the guard is dropped before the work +finishes (e.g., because of a `return foo(...)` expression), the measurement will be too +small. Keep the guard alive until after the work: + +```rust +let _scope = perf::enabled::YOUR_COUNTER.scope(); +let ret = (|| { + // measured work + ... +})(); +std::hint::black_box(&_scope); // Tells Rust to be pessimistic about optimizing this variable. +ret +``` + +### Ensuring only one active timer +`lind-boot` runs the benchmark module once per counter. On each run it enables exactly one +counter and disables the rest, then prints a report. This avoids stacked measurement overhead +from multiple counters running at the same time. + +The logic for this can be seen in [`lind-boot/src/main.rs`](../lind-boot/src/main.rs) + +### Adding a new benchmark site +Suppose we want to add a new timer in `threei` for the `copy_data_between_cages` function. We will need to make the following changes: + +1. Add a counter in `src/threei/src/perf.rs` and include it in `ALL_COUNTERS`. +2. Add a scoped timer in `src/threei/src/threei.rs` at the top of the `copy_data_between_cages` function. +3. Keep the guard alive until after the measured work if the function has multiple return paths. This can be done by moving measured work into an unnamed scope, and using the `std::hint::black_box` to avoid the scope being optimized out early. + +In case we want to only benchmark a snippet of a function instead of the entire thing, we can `drop` the scope manually: + +```rust +let scope = perf::enabled::YOUR_COUNTER.scope(); +// measured snippet +drop(scope); +``` + +### Adding a new crate +Currently the crates that are supported are `wasmtime_lind_common`, `fdtables`, `rawposix`, and `threei`. In order to add support for a new crate, the following changes are needed: + +1. Add a `perf.rs` module to the new crate and define counters plus `ALL_COUNTERS`. +2. Export `ALL_COUNTERS` from the crate�~@~Ys `perf` module. +3. Add the crate�~@~Ys counters to `lind-boot` enumeration, enable/reset, and reporting. +4. Rebuild `lind-boot` with `--features lind_perf` to include the new module. diff --git a/src/lind-perf/src/counter.rs b/src/lind-perf/src/counter.rs new file mode 100644 index 000000000..a4598e71d --- /dev/null +++ b/src/lind-perf/src/counter.rs @@ -0,0 +1,142 @@ +use crate::timers::{TimerKind, default_timer_kind, read_end, read_start}; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; + +/// Counter stores information pertaining to a specific benchmarking site. +/// +/// Typically declared as `static` and imported in lind-boot. +pub struct Counter { + /// Counts the total number of CPU cycles or Nanoseconds spent. + pub cycles: AtomicU64, + /// Counts the total number of invocations. + pub calls: AtomicU64, + pub name: &'static str, + /// Only one Counter is globally enabled during a given run. + pub enabled: AtomicBool, + /// Stores TimerKind + timer: AtomicU8, +} + +impl Counter { + /// Create a counter with the default timer. + /// + /// Use this for most counters; change the timer only when you need cycles. + pub const fn new(name: &'static str) -> Self { + Self { + cycles: AtomicU64::new(0), + calls: AtomicU64::new(0), + name, + enabled: AtomicBool::new(false), + timer: AtomicU8::new(default_timer_kind() as u8), + } + } + + #[inline(always)] + /// Start a measurement for this counter. + /// + /// Returns `0` if the counter is disabled. + pub fn start(&self) -> u64 { + if self.enabled.load(Ordering::Relaxed) { + read_start(self.timer_kind()) + } else { + 0 + } + } + + #[inline(always)] + /// Record a measurement using the start timestamp. + /// + /// This is a no-op when the counter is disabled. + pub fn record(&self, start: u64) { + if self.enabled.load(Ordering::Relaxed) { + let elapsed = read_end(self.timer_kind()).saturating_sub(start); + // Add elapsed time to the counter. + self.cycles.fetch_add(elapsed, Ordering::Relaxed); + // Increment total calls. + self.calls.fetch_add(1, Ordering::Relaxed); + } + } + + #[inline(always)] + /// Create an RAII scope guard that records on drop. + pub fn get_timer(&self) -> Scope<'_> { + Scope { + counter: self, + start: self.start(), + } + } + + /// Enable this counter. + pub fn enable(&self) { + self.enabled.store(true, Ordering::Relaxed); + } + + /// Disable this counter. + pub fn disable(&self) { + self.enabled.store(false, Ordering::Relaxed); + } + + /// Reset totals for this counter. + pub fn reset(&self) { + self.cycles.store(0, Ordering::Relaxed); + self.calls.store(0, Ordering::Relaxed); + } + + /// Set the timer backend for this counter. + /// + /// This does not reset totals. + pub fn set_timer_kind(&self, kind: TimerKind) { + self.timer.store(kind as u8, Ordering::Relaxed); + } + + /// Read the current timer backend. + pub fn timer_kind(&self) -> TimerKind { + match self.timer.load(Ordering::Relaxed) { + 0 => TimerKind::Rdtsc, + _ => TimerKind::Clock, + } + } +} + +/// Scope is the implementation of the RAII guard which stores a Counter and the start time (when +/// it was introduced). +/// +/// Upon drop, it records for the Counter the total time elapsed. +pub struct Scope<'a> { + counter: &'a Counter, + start: u64, +} + +impl Drop for Scope<'_> { + fn drop(&mut self) { + self.counter.record(self.start); + } +} + +/// Reset all counters in a group. +pub fn reset_all(counters: &[&Counter]) { + for c in counters { + c.reset(); + } +} + +/// Set a timer for a counter group. +/// +/// This updates the backend for all counters in the slice. +pub fn set_timer(counters: &[&Counter], kind: TimerKind) { + for c in counters { + c.set_timer_kind(kind); + } +} + +/// Enable only the named counter in a group. +/// +/// All other counters in the slice are disabled. +pub fn enable_name(counters: &[&Counter], name: &str) { + for c in counters { + if c.name == name { + c.enable(); + } else { + c.disable(); + } + } +} diff --git a/src/lind-perf/src/lib.rs b/src/lind-perf/src/lib.rs new file mode 100644 index 000000000..d02bd71b9 --- /dev/null +++ b/src/lind-perf/src/lib.rs @@ -0,0 +1,7 @@ +mod counter; +mod report; +mod timers; + +pub use counter::*; +pub use report::*; +pub use timers::*; diff --git a/src/lind-perf/src/report.rs b/src/lind-perf/src/report.rs new file mode 100644 index 000000000..449be5eca --- /dev/null +++ b/src/lind-perf/src/report.rs @@ -0,0 +1,74 @@ +use crate::counter::Counter; +use crate::timers::{PrettyDuration, TimerKind}; +use std::sync::atomic::Ordering; +use std::time::Duration; + +/// Print a section header. +pub fn report_header(header: String) { + let pad = "-"; + let total = 97 - header.len(); + let left = total / 2; + let right = total - left; + + println!("\n{}{}{}", pad.repeat(left), header, pad.repeat(right),); +} + +/// Print a report for a counter group. +/// +/// The report is sorted by definition order, not by cost. +pub fn report(counters: &[&Counter]) { + // Tunable constants + const NAME_W: usize = 60; + const CALLS_W: usize = 10; + const NUM_W: usize = 12; + + let mut rows: Vec = Vec::new(); + + for c in counters { + let calls = c.calls.load(Ordering::Relaxed); + if calls == 0 { + continue; + } + + let cycles = match c.timer_kind() { + TimerKind::Rdtsc => format!("{:#?}", c.cycles.load(Ordering::Relaxed)), + TimerKind::Clock => format!( + "{}", + PrettyDuration(Duration::from_nanos(c.cycles.load(Ordering::Relaxed))) + ), + }; + + let avg = match c.timer_kind() { + TimerKind::Rdtsc => format!("{:#?}", c.cycles.load(Ordering::Relaxed) / calls), + TimerKind::Clock => format!( + "{}", + PrettyDuration(Duration::from_nanos( + c.cycles.load(Ordering::Relaxed) / calls + )) + ), + }; + + // {:CALLS_W$} {:>NUM_W$} {:>NUM_W$}", + c.name, calls, cycles, avg, + )); + } + + if rows.len() == 0 { + return; + } + + eprintln!( + "{:CALLS_W$} {:>NUM_W$} {:>NUM_W$}", + "name", "calls", "total", "avg", + ); + + eprintln!("{}", "-".repeat(NAME_W + CALLS_W + NUM_W * 2 + 3)); + + for i in rows { + eprintln!("{}", i); + } + + println!(""); +} diff --git a/src/lind-perf/src/timers.rs b/src/lind-perf/src/timers.rs new file mode 100644 index 000000000..5b6a91935 --- /dev/null +++ b/src/lind-perf/src/timers.rs @@ -0,0 +1,103 @@ +use std::time::Duration; + +/// Formats nanosecond totals for reports. Converts nanosecond input to larger units where +/// appropriate and truncates to 3 decimal points. +pub struct PrettyDuration(pub Duration); + +impl std::fmt::Display for PrettyDuration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ns_f = self.0.as_nanos() as f64; + + let format = if ns_f < 1_000.0 { + format!("{:.3}ns", ns_f) + } else if ns_f < 1_000_000.0 { + format!("{:.3}µs", ns_f / 1_000.0) + } else if ns_f < 1_000_000_000.0 { + format!("{:.3}ms", ns_f / 1_000_000.0) + } else { + format!("{:.3}s", ns_f / 1_000_000_000.0) + }; + + write!(f, "{}", format) + } +} + +/// TimerKind defines the timer-backend to be used for benchmarks. We support two kinds of timers +/// currently, +/// +/// RDTSC: Time Stamp Counter that counts the number of CPU cycles that have elapsed. +/// Clock: Uses CLOCK_MONOTONIC_RAW to get the current time in nanoseconds. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum TimerKind { + Rdtsc = 0, + Clock = 1, +} + +/// Get the default timer. +pub const fn default_timer_kind() -> TimerKind { + TimerKind::Clock +} + +/// Public functions to record start and end times depending on the TimerKind being used. +#[inline(always)] +pub fn read_start(kind: TimerKind) -> u64 { + match kind { + TimerKind::Rdtsc => rdtsc_start(), + TimerKind::Clock => clock_now(), + } +} + +#[inline(always)] +pub fn read_end(kind: TimerKind) -> u64 { + match kind { + TimerKind::Rdtsc => rdtsc_end(), + TimerKind::Clock => clock_now(), + } +} + +#[inline(always)] +fn rdtsc_start() -> u64 { + // RDTSC is only available of x864 machines. + // In case this API is not exposed, default back to Clock. + #[cfg(target_arch = "x86_64")] + unsafe { + // From Intel's documentation : + // + // Perform a serializing operation on all load-from-memory instructions that were + // issued prior to this instruction. Guarantees that every load instruction that precedes, + // in program order, is globally visible before any load instruction which follows + // the fence in program order. + core::arch::x86_64::_mm_lfence(); + return core::arch::x86_64::_rdtsc(); + } + return clock_now(); +} + +#[inline(always)] +fn rdtsc_end() -> u64 { + #[cfg(target_arch = "x86_64")] + unsafe { + // End the TSC timer. + let mut aux = 0u32; + let tsc = core::arch::x86_64::__rdtscp(&mut aux); + // End the load fence. + core::arch::x86_64::_mm_lfence(); + return tsc; + } + return clock_now(); +} + +#[inline(always)] +fn clock_now() -> u64 { + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + let rc = unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC_RAW, &mut ts) }; + if rc != 0 { + panic!("Unable to get a CLOCK_MONOTONIC_RAW time. Aborting benchmarks."); + } + return (ts.tv_sec as u64) + .saturating_mul(1_000_000_000) + .saturating_add(ts.tv_nsec as u64); +} diff --git a/src/rawposix/Cargo.toml b/src/rawposix/Cargo.toml index 27cc71a42..9e603cf3d 100644 --- a/src/rawposix/Cargo.toml +++ b/src/rawposix/Cargo.toml @@ -16,10 +16,12 @@ sysdefs = { path = "../sysdefs" } typemap = { path = "../typemap" } cage = { path = "../cage" } threei = { path = "../threei" } +lind-perf = { path = "../lind-perf", optional=true } [features] default = ["fast"] fast = [] secure = [] +lind_perf = ["dep:lind-perf"] [dev-dependencies] diff --git a/src/threei/Cargo.toml b/src/threei/Cargo.toml index e3f838b13..e88d65142 100644 --- a/src/threei/Cargo.toml +++ b/src/threei/Cargo.toml @@ -13,11 +13,13 @@ once_cell = "1.18" lazy_static = "1.4" parking_lot = "0.12" nodit = "0.9.2" # Used for VMMAP +lind-perf = { path = "../lind-perf", optional = true } [features] -default = ["hashmap"] +default = ["hashmap"] hashmap = [] -dashmap = [] +dashmap = [] +lind_perf = ["dep:lind-perf"] [dev-dependencies] serial_test = "3" diff --git a/src/wasmtime/crates/lind-common/Cargo.toml b/src/wasmtime/crates/lind-common/Cargo.toml index bd95c8e3b..ae0b20bfc 100644 --- a/src/wasmtime/crates/lind-common/Cargo.toml +++ b/src/wasmtime/crates/lind-common/Cargo.toml @@ -22,6 +22,8 @@ sysdefs = { path = "../sysdefs" } wasmtime-lind-3i = { path = "../lind-3i" } cage = { path = "../cage" } typemap = { path = "../typemap" } +lind-perf = { path = "../lind-perf", optional = true } [features] lind_debug = [] +lind_perf = ["dep:lind-perf", "threei/lind_perf", "rawposix/lind_perf"] diff --git a/src/wasmtime/crates/lind-perf b/src/wasmtime/crates/lind-perf new file mode 120000 index 000000000..fd055b135 --- /dev/null +++ b/src/wasmtime/crates/lind-perf @@ -0,0 +1 @@ +../../lind-perf \ No newline at end of file From ce2d32453cd3161925e68591201d081e663f412f Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Thu, 26 Feb 2026 16:58:40 -0500 Subject: [PATCH 02/10] lind-perf initial commit - Add lind-perf library - Patch --features lind_perf to lind-boot, rawposix, threei, wasmtime-lind-common - Add basic timers in lind-boot::trampoline - Add --perf=[] UI flag with proper handling --- src/lind-boot/Cargo.toml | 4 +- src/lind-boot/src/cli.rs | 44 +++++-- src/lind-boot/src/lind_wasmtime/execute.rs | 6 +- src/lind-boot/src/lind_wasmtime/trampoline.rs | 10 +- src/lind-boot/src/main.rs | 56 +++------ src/lind-boot/src/perf.rs | 72 +++++------- src/lind-perf/Cargo.lock | 9 ++ src/lind-perf/Cargo.toml | 5 + src/lind-perf/src/disabled.rs | 111 ++++++++++++++++++ src/lind-perf/src/{ => enabled}/counter.rs | 25 ++-- src/lind-perf/src/enabled/mod.rs | 7 ++ src/lind-perf/src/enabled/report.rs | 90 ++++++++++++++ src/lind-perf/src/enabled/timers.rs | 65 ++++++++++ src/lind-perf/src/lib.rs | 24 +++- src/lind-perf/src/report.rs | 7 ++ src/lind-perf/src/timers.rs | 103 ---------------- src/rawposix/Cargo.toml | 4 +- src/threei/Cargo.toml | 4 +- src/wasmtime/crates/lind-common/Cargo.toml | 4 +- 19 files changed, 412 insertions(+), 238 deletions(-) create mode 100644 src/lind-perf/src/disabled.rs rename src/lind-perf/src/{ => enabled}/counter.rs (82%) create mode 100644 src/lind-perf/src/enabled/mod.rs create mode 100644 src/lind-perf/src/enabled/report.rs create mode 100644 src/lind-perf/src/enabled/timers.rs delete mode 100644 src/lind-perf/src/timers.rs diff --git a/src/lind-boot/Cargo.toml b/src/lind-boot/Cargo.toml index ad7b40cab..58f071d81 100644 --- a/src/lind-boot/Cargo.toml +++ b/src/lind-boot/Cargo.toml @@ -8,7 +8,7 @@ disable_signals = ["cage/disable_signals", "wasmtime-lind-multi-process/disable_ secure = ["typemap/secure"] lind_debug = ["wasmtime-lind-common/lind_debug"] lind_perf = [ - "dep:lind-perf", + "lind-perf/enabled", "threei/lind_perf", "rawposix/lind_perf", "wasmtime-lind-common/lind_perf", @@ -28,7 +28,7 @@ wasmtime-lind-3i = { path = "../wasmtime/crates/lind-3i" } wasmtime = { path = "../wasmtime/crates/wasmtime", features = ["cranelift", "pooling-allocator", "gc", "threads", "demangle", "addr2line", "cache"], default-features = false } wasmtime-wasi-threads = { path = "../wasmtime/crates/wasi-threads" } wasmtime-wasi = { version = "23.0.0", features = ["preview1"] , default-features = false } -lind-perf = { path = "../wasmtime/crates/lind-perf", optional = true } +lind-perf = { path = "../wasmtime/crates/lind-perf" } wiggle = { version = "23.0.0", default-features = false } anyhow = { version = "1.0.66", default-features = false } diff --git a/src/lind-boot/src/cli.rs b/src/lind-boot/src/cli.rs index d41dde20f..58aec37a7 100644 --- a/src/lind-boot/src/cli.rs +++ b/src/lind-boot/src/cli.rs @@ -1,5 +1,11 @@ use clap::*; +#[derive(Debug, Clone, Copy, ValueEnum)] +pub enum PerfTimer { + Clock, + Tsc, +} + #[derive(Debug, Parser, Clone)] #[command(name = "lind-boot")] pub struct CliOptions { @@ -40,15 +46,18 @@ pub struct CliOptions { #[arg(long = "env", number_of_values = 1, value_name = "NAME[=VAL]", value_parser = parse_env_var)] pub vars: Vec<(String, Option)>, - /// Run performance benchmark with CLOCK_GETTIME (requires the `lind_perf` feature) - #[cfg(feature = "lind_perf")] - #[arg(long)] - pub perf: bool, - - /// Run performance benchmarks with TSC (requires the `lind_perf` feature) - #[cfg(feature = "lind_perf")] - #[arg(long)] - pub perftsc: bool, + /// Get performance information for the running module. + /// + /// Requires compilation with `lind_perf` feature. + #[arg( + long, + value_enum, + default_missing_value = "clock", + value_name = "clock|tsc", + num_args = 0..=1, + require_equals = true, + )] + pub perf: Option, } pub fn parse_env_var(s: &str) -> Result<(String, Option), String> { @@ -63,4 +72,21 @@ impl CliOptions { pub fn wasm_file(&self) -> &str { &self.args[0] } + + pub fn perf_timer_kind(&self) -> Option { + match lind_perf::ENABLED { + false => match self.perf { + Some(_) => { + eprintln!("--perf needs compilation with the feature `lind_perf` enabled."); + std::process::exit(1); + } + None => None, + }, + true => match self.perf { + Some(PerfTimer::Clock) => Some(lind_perf::TimerKind::Clock), + Some(PerfTimer::Tsc) => Some(lind_perf::TimerKind::Rdtsc), + None => None, + }, + } + } } diff --git a/src/lind-boot/src/lind_wasmtime/execute.rs b/src/lind-boot/src/lind_wasmtime/execute.rs index 1473a1358..736d5154d 100644 --- a/src/lind-boot/src/lind_wasmtime/execute.rs +++ b/src/lind-boot/src/lind_wasmtime/execute.rs @@ -1,5 +1,5 @@ use crate::{cli::CliOptions, lind_wasmtime::host::HostCtx, lind_wasmtime::trampoline::*}; -use anyhow::{Context, Result, anyhow, bail}; +use anyhow::{anyhow, bail, Context, Result}; use cage::signal::{lind_signal_init, signal_may_trigger}; use cfg_if::cfg_if; use std::ffi::c_void; @@ -13,8 +13,8 @@ use wasmtime::{ AsContextMut, Engine, Func, InstantiateType, Linker, Module, Precompiled, Store, Val, ValType, WasmBacktraceDetails, }; -use wasmtime_lind_3i::{VmCtxWrapper, init_vmctx_pool, rm_vmctx, set_vmctx, set_vmctx_thread}; -use wasmtime_lind_multi_process::{CAGE_START_ID, LindCtx, THREAD_START_ID}; +use wasmtime_lind_3i::{init_vmctx_pool, rm_vmctx, set_vmctx, set_vmctx_thread, VmCtxWrapper}; +use wasmtime_lind_multi_process::{LindCtx, CAGE_START_ID, THREAD_START_ID}; use wasmtime_lind_utils::LindCageManager; use wasmtime_wasi_threads::WasiThreadsCtx; diff --git a/src/lind-boot/src/lind_wasmtime/trampoline.rs b/src/lind-boot/src/lind_wasmtime/trampoline.rs index 65037c8ac..35b985c59 100644 --- a/src/lind-boot/src/lind_wasmtime/trampoline.rs +++ b/src/lind-boot/src/lind_wasmtime/trampoline.rs @@ -6,7 +6,6 @@ use wasmtime::{Caller, Instance}; use wasmtime_lind_3i::{VmCtxWrapper, get_vmctx, set_vmctx}; use wasmtime_lind_multi_process; -#[cfg(feature = "lind_perf")] use crate::perf; /// The callback function registered with 3i uses a unified Wasm entry @@ -48,19 +47,16 @@ pub extern "C" fn grate_callback_trampoline( arg6: u64, arg6cageid: u64, ) -> i32 { - #[cfg(feature = "lind_perf")] - let _grate_callback_timer = perf::enabled::GRATE_CALLBACK_TRAMPOLINE.get_timer(); + let _grate_callback_timer = lind_perf::get_timer!(perf::GRATE_CALLBACK_TRAMPOLINE); - - #[cfg(feature = "lind_perf")] - let _vmctx_timer = perf::enabled::TRAMPOLINE_GET_VMCTX.get_timer(); + let vmctx_timer = lind_perf::get_timer!(perf::TRAMPOLINE_GET_VMCTX); let vmctx_wrapper: VmCtxWrapper = match get_vmctx(cageid) { Some(v) => v, None => { panic!("no VMContext found for cage_id {}", cageid); } }; - drop(_vmctx_timer); + drop(vmctx_timer); // Convert back to VMContext let opaque: *mut VMOpaqueContext = vmctx_wrapper.as_ptr() as *mut VMOpaqueContext; diff --git a/src/lind-boot/src/main.rs b/src/lind-boot/src/main.rs index 774534745..956f043a9 100644 --- a/src/lind-boot/src/main.rs +++ b/src/lind-boot/src/main.rs @@ -9,9 +9,6 @@ use crate::{ use clap::Parser; use rawposix::init::{rawposix_shutdown, rawposix_start}; -#[cfg(feature = "lind_perf")] -use lind_perf::TimerKind; - /// Entry point of the lind-boot executable. /// /// The expected invocation follows: the first non-flag argument specifies the @@ -25,50 +22,27 @@ use lind_perf::TimerKind; fn main() -> Result<(), Box> { let lindboot_cli = CliOptions::parse(); - // Entry point for a lind_perf enabled build. - // - // When run with --perf flags, it performs the required setup and teardown, along with running - // the inputted wasm benchmark multiple times (once per counter). - #[cfg(feature = "lind_perf")] - { - // Determine which timer to use. --perftsc => Rdtsc, --perf => Clock - let kind = if lindboot_cli.perftsc { - Some(TimerKind::Rdtsc) - } else if lindboot_cli.perf { - Some(TimerKind::Clock) - } else { - None - }; - - match kind { - Some(k) => { - // Initiate all counters - perf::enabled::init(k); - - // Iterate over all counters, enable one at a time, run the wasm module. - for name in perf::enabled::all_counter_names() { - perf::enabled::enable_one(name); + // AOT-compile only — no runtime needed + if lindboot_cli.precompile { + precompile_module(&lindboot_cli)?; + return Ok(()); + } - rawposix_start(0); + if let Some(kind) = lindboot_cli.perf_timer_kind() { + perf::perf_init(kind); - let _ = execute_wasmtime(lindboot_cli.clone()); + let counters = perf::all_counter_names(); - rawposix_shutdown(); - } + for counter in counters { + perf::enable_one_counter(counter); - // Print the final report. - perf::enabled::report(); + rawposix_start(0); + let _ = execute_wasmtime(lindboot_cli.clone()); + rawposix_shutdown(); + } - return Ok(()); - } - // In case neither --perf flag is set, fall back to default lind-boot behaviour. - None => {} - }; - } + perf::perf_report(); - // AOT-compile only — no runtime needed - if lindboot_cli.precompile { - precompile_module(&lindboot_cli)?; return Ok(()); } diff --git a/src/lind-boot/src/perf.rs b/src/lind-boot/src/perf.rs index 680254a9e..434d30533 100644 --- a/src/lind-boot/src/perf.rs +++ b/src/lind-boot/src/perf.rs @@ -5,53 +5,39 @@ /// - Initializing them /// - Combining all the COUNTERS into one list to iterate over and sequentially enable /// - Printing a combined lind-perf report. -#[cfg(feature = "lind_perf")] -pub mod enabled { - use lind_perf::{Counter, TimerKind, enable_name, reset_all, set_timer}; +use crate::cli::CliOptions; +use lind_perf::{Counter, TimerKind}; - // These are counters defined within lind-boot. - pub static GRATE_CALLBACK_TRAMPOLINE: Counter = - Counter::new("lind_boot::grate_callback_trampoline"); - pub static TRAMPOLINE_GET_VMCTX: Counter = Counter::new("lind_boot::trampoline::get_vmctx"); - pub static TRAMPOLINE_GET_PASS_FPTR_TO_WT: Counter = - Counter::new("lind_boot::trampoline::get_pass_fptr_to_wt"); - pub static TRAMPOLINE_TYPED_DISPATCH_CALL: Counter = - Counter::new("lind_boot::trampoline::typed_dispatch_call"); +// These are counters defined within lind-boot. +pub static GRATE_CALLBACK_TRAMPOLINE: Counter = + Counter::new("lind_boot::grate_callback_trampoline"); +pub static TRAMPOLINE_GET_VMCTX: Counter = Counter::new("lind_boot::trampoline::get_vmctx"); - pub static LIND_BOOT_COUNTERS: &[&Counter] = &[ - &GRATE_CALLBACK_TRAMPOLINE, - &TRAMPOLINE_GET_VMCTX, - &TRAMPOLINE_GET_PASS_FPTR_TO_WT, - &TRAMPOLINE_TYPED_DISPATCH_CALL, - ]; +pub static LIND_BOOT_COUNTERS: &[&Counter] = &[&GRATE_CALLBACK_TRAMPOLINE, &TRAMPOLINE_GET_VMCTX]; - /// Initialize counters for all modules, involves setting the TimerKind and resetting the - /// counts. - pub fn init(kind: TimerKind) { - set_timer(LIND_BOOT_COUNTERS, kind); - - reset_all(LIND_BOOT_COUNTERS); - } - - /// Finds a counter by it's name and searches for it across modules to enable it. Disables all - /// other counters. - pub fn enable_one(name: &str) { - enable_name(LIND_BOOT_COUNTERS, name); - } +/// Initialize counters for all modules, involves setting the TimerKind and resetting the +/// counts. +pub fn perf_init(kind: TimerKind) { + lind_perf::set_timer(LIND_BOOT_COUNTERS, kind); + lind_perf::reset_all(LIND_BOOT_COUNTERS); +} - /// Get a list of all counter names. - pub fn all_counter_names() -> Vec<&'static str> { - let mut names = Vec::new(); - names.extend(LIND_BOOT_COUNTERS.iter().map(|c| c.name)); - names - } +/// Finds a counter by it's name and searches for it across modules to enable it. Disables all +/// other counters. +pub fn enable_one_counter(name: &str) { + lind_perf::enable_name(LIND_BOOT_COUNTERS, name); +} - /// Print a report for every module - pub fn report() { - lind_perf::report_header(format!("LIND-BOOT")); - lind_perf::report(LIND_BOOT_COUNTERS); - } +/// Get a list of all counter names. +pub fn all_counter_names() -> Vec<&'static str> { + LIND_BOOT_COUNTERS + .iter() + .filter_map(|c| c.get_name()) + .collect() } -#[cfg(not(feature = "lind_perf"))] -pub mod enabled {} +/// Print a report for every module. +pub fn perf_report() { + lind_perf::report_header(format!("LIND-BOOT")); + lind_perf::report(LIND_BOOT_COUNTERS); +} diff --git a/src/lind-perf/Cargo.lock b/src/lind-perf/Cargo.lock index 60e808a28..27c6f0a05 100644 --- a/src/lind-perf/Cargo.lock +++ b/src/lind-perf/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "libc" +version = "0.2.182" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" + [[package]] name = "lind-perf" version = "0.1.0" +dependencies = [ + "libc", +] diff --git a/src/lind-perf/Cargo.toml b/src/lind-perf/Cargo.toml index 990476486..02a95ab80 100644 --- a/src/lind-perf/Cargo.toml +++ b/src/lind-perf/Cargo.toml @@ -3,4 +3,9 @@ name = "lind-perf" version = "0.1.0" edition = "2024" +[features] +default = [] +enabled = ["dep:libc"] + [dependencies] +libc = { version = "0.2", optional = true } diff --git a/src/lind-perf/src/disabled.rs b/src/lind-perf/src/disabled.rs new file mode 100644 index 000000000..c6ba0fb68 --- /dev/null +++ b/src/lind-perf/src/disabled.rs @@ -0,0 +1,111 @@ +use std::time::Duration; + +/// Formats nanosecond totals for reports. +pub struct PrettyDuration(pub Duration); + +impl std::fmt::Display for PrettyDuration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ns_f = self.0.as_nanos() as f64; + + let format = if ns_f < 1_000.0 { + format!("{:.3}ns", ns_f) + } else if ns_f < 1_000_000.0 { + format!("{:.3}us", ns_f / 1_000.0) + } else if ns_f < 1_000_000_000.0 { + format!("{:.3}ms", ns_f / 1_000_000.0) + } else { + format!("{:.3}s", ns_f / 1_000_000_000.0) + }; + + write!(f, "{}", format) + } +} + +/// TimerKind exists in both enabled and disabled builds for API consistency. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum TimerKind { + Rdtsc = 0, + Clock = 1, +} + +/// Get the default timer. +pub const fn default_timer_kind() -> TimerKind { + TimerKind::Clock +} + +#[inline(always)] +pub fn read_start(_kind: TimerKind) -> u64 { + 0 +} + +#[inline(always)] +pub fn read_end(_kind: TimerKind) -> u64 { + 0 +} + +/// Lightweight no-op counter representation for disabled builds. +pub struct Counter; + +impl Counter { + pub const fn new(_name: &'static str) -> Self { + Self + } + + pub fn get_name(&self) -> Option<&'static str> { + None + } + + #[inline(always)] + pub fn start(&self) -> u64 { + let _ = self; + 0 + } + + #[inline(always)] + pub fn record(&self, _start: u64) { + let _ = self; + } + + #[inline(always)] + pub fn get_timer(&self) -> Scope { + let _ = self; + Scope + } + + pub fn enable(&self) { + let _ = self; + } + + pub fn disable(&self) { + let _ = self; + } + + pub fn reset(&self) { + let _ = self; + } + + pub fn set_timer_kind(&self, _kind: TimerKind) { + let _ = self; + } + + pub fn timer_kind(&self) -> TimerKind { + TimerKind::Clock + } +} + +/// No-op RAII guard for disabled builds. +pub struct Scope; + +impl Drop for Scope { + fn drop(&mut self) {} +} + +pub fn reset_all(_counters: &[&Counter]) {} + +pub fn set_timer(_counters: &[&Counter], _kind: TimerKind) {} + +pub fn enable_name(_counters: &[&Counter], _name: &str) {} + +pub fn report_header(_header: String) {} + +pub fn report(_counters: &[&Counter]) {} diff --git a/src/lind-perf/src/counter.rs b/src/lind-perf/src/enabled/counter.rs similarity index 82% rename from src/lind-perf/src/counter.rs rename to src/lind-perf/src/enabled/counter.rs index a4598e71d..a640e90b8 100644 --- a/src/lind-perf/src/counter.rs +++ b/src/lind-perf/src/enabled/counter.rs @@ -1,9 +1,7 @@ -use crate::timers::{TimerKind, default_timer_kind, read_end, read_start}; +use crate::{TimerKind, default_timer_kind, read_end, read_start}; use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; /// Counter stores information pertaining to a specific benchmarking site. -/// -/// Typically declared as `static` and imported in lind-boot. pub struct Counter { /// Counts the total number of CPU cycles or Nanoseconds spent. pub cycles: AtomicU64, @@ -12,14 +10,12 @@ pub struct Counter { pub name: &'static str, /// Only one Counter is globally enabled during a given run. pub enabled: AtomicBool, - /// Stores TimerKind + /// Stores TimerKind. timer: AtomicU8, } impl Counter { /// Create a counter with the default timer. - /// - /// Use this for most counters; change the timer only when you need cycles. pub const fn new(name: &'static str) -> Self { Self { cycles: AtomicU64::new(0), @@ -30,6 +26,10 @@ impl Counter { } } + pub fn get_name(&self) -> Option<&'static str> { + Some(self.name) + } + #[inline(always)] /// Start a measurement for this counter. /// @@ -49,9 +49,7 @@ impl Counter { pub fn record(&self, start: u64) { if self.enabled.load(Ordering::Relaxed) { let elapsed = read_end(self.timer_kind()).saturating_sub(start); - // Add elapsed time to the counter. self.cycles.fetch_add(elapsed, Ordering::Relaxed); - // Increment total calls. self.calls.fetch_add(1, Ordering::Relaxed); } } @@ -82,8 +80,6 @@ impl Counter { } /// Set the timer backend for this counter. - /// - /// This does not reset totals. pub fn set_timer_kind(&self, kind: TimerKind) { self.timer.store(kind as u8, Ordering::Relaxed); } @@ -97,10 +93,7 @@ impl Counter { } } -/// Scope is the implementation of the RAII guard which stores a Counter and the start time (when -/// it was introduced). -/// -/// Upon drop, it records for the Counter the total time elapsed. +/// Scope is the RAII guard that records elapsed time on drop. pub struct Scope<'a> { counter: &'a Counter, start: u64, @@ -120,8 +113,6 @@ pub fn reset_all(counters: &[&Counter]) { } /// Set a timer for a counter group. -/// -/// This updates the backend for all counters in the slice. pub fn set_timer(counters: &[&Counter], kind: TimerKind) { for c in counters { c.set_timer_kind(kind); @@ -129,8 +120,6 @@ pub fn set_timer(counters: &[&Counter], kind: TimerKind) { } /// Enable only the named counter in a group. -/// -/// All other counters in the slice are disabled. pub fn enable_name(counters: &[&Counter], name: &str) { for c in counters { if c.name == name { diff --git a/src/lind-perf/src/enabled/mod.rs b/src/lind-perf/src/enabled/mod.rs new file mode 100644 index 000000000..d02bd71b9 --- /dev/null +++ b/src/lind-perf/src/enabled/mod.rs @@ -0,0 +1,7 @@ +mod counter; +mod report; +mod timers; + +pub use counter::*; +pub use report::*; +pub use timers::*; diff --git a/src/lind-perf/src/enabled/report.rs b/src/lind-perf/src/enabled/report.rs new file mode 100644 index 000000000..dc0036cdb --- /dev/null +++ b/src/lind-perf/src/enabled/report.rs @@ -0,0 +1,90 @@ +use crate::{Counter, TimerKind}; +use std::sync::atomic::Ordering; +use std::time::Duration; + +/// Formats nanosecond totals for reports. Converts nanosecond input to larger units where +/// appropriate and truncates to 3 decimal points. +pub struct PrettyDuration(pub Duration); + +impl std::fmt::Display for PrettyDuration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ns_f = self.0.as_nanos() as f64; + + let format = if ns_f < 1_000.0 { + format!("{:.3}ns", ns_f) + } else if ns_f < 1_000_000.0 { + format!("{:.3}us", ns_f / 1_000.0) + } else if ns_f < 1_000_000_000.0 { + format!("{:.3}ms", ns_f / 1_000_000.0) + } else { + format!("{:.3}s", ns_f / 1_000_000_000.0) + }; + + write!(f, "{}", format) + } +} + +/// Print a section header. +pub fn report_header(header: String) { + let pad = "-"; + let total = 97 - header.len(); + let left = total / 2; + let right = total - left; + + println!("\n{}{}{}", pad.repeat(left), header, pad.repeat(right),); +} + +/// Print a report for a counter group. +pub fn report(counters: &[&Counter]) { + const NAME_W: usize = 60; + const CALLS_W: usize = 10; + const NUM_W: usize = 12; + + let mut rows: Vec = Vec::new(); + + for c in counters { + let calls = c.calls.load(Ordering::Relaxed); + if calls == 0 { + continue; + } + + let cycles = match c.timer_kind() { + TimerKind::Rdtsc => format!("{:#?}", c.cycles.load(Ordering::Relaxed)), + TimerKind::Clock => format!( + "{}", + PrettyDuration(Duration::from_nanos(c.cycles.load(Ordering::Relaxed))) + ), + }; + + let avg = match c.timer_kind() { + TimerKind::Rdtsc => format!("{:#?}", c.cycles.load(Ordering::Relaxed) / calls), + TimerKind::Clock => format!( + "{}", + PrettyDuration(Duration::from_nanos( + c.cycles.load(Ordering::Relaxed) / calls + )) + ), + }; + + rows.push(format!( + "{:CALLS_W$} {:>NUM_W$} {:>NUM_W$}", + c.name, calls, cycles, avg, + )); + } + + if rows.is_empty() { + return; + } + + eprintln!( + "{:CALLS_W$} {:>NUM_W$} {:>NUM_W$}", + "name", "calls", "total", "avg", + ); + eprintln!("{}", "-".repeat(NAME_W + CALLS_W + NUM_W * 2 + 3)); + + for row in rows { + eprintln!("{}", row); + } + + println!(""); +} diff --git a/src/lind-perf/src/enabled/timers.rs b/src/lind-perf/src/enabled/timers.rs new file mode 100644 index 000000000..c1945ca55 --- /dev/null +++ b/src/lind-perf/src/enabled/timers.rs @@ -0,0 +1,65 @@ +/// TimerKind defines the timer-backend to be used for benchmarks. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum TimerKind { + Rdtsc = 0, + Clock = 1, +} + +/// Get the default timer. +pub const fn default_timer_kind() -> TimerKind { + TimerKind::Clock +} + +/// Public functions to record start and end times depending on the TimerKind being used. +#[inline(always)] +pub fn read_start(kind: TimerKind) -> u64 { + match kind { + TimerKind::Rdtsc => rdtsc_start(), + TimerKind::Clock => clock_now(), + } +} + +#[inline(always)] +pub fn read_end(kind: TimerKind) -> u64 { + match kind { + TimerKind::Rdtsc => rdtsc_end(), + TimerKind::Clock => clock_now(), + } +} + +#[inline(always)] +fn rdtsc_start() -> u64 { + #[cfg(target_arch = "x86_64")] + unsafe { + core::arch::x86_64::_mm_lfence(); + return core::arch::x86_64::_rdtsc(); + } + clock_now() +} + +#[inline(always)] +fn rdtsc_end() -> u64 { + #[cfg(target_arch = "x86_64")] + unsafe { + let mut aux = 0u32; + let tsc = core::arch::x86_64::__rdtscp(&mut aux); + core::arch::x86_64::_mm_lfence(); + return tsc; + } + clock_now() +} + +#[inline(always)] +fn clock_now() -> u64 { + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + let rc = unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC_RAW, &mut ts) }; + if rc != 0 { + panic!("Unable to get a CLOCK_MONOTONIC_RAW time. Aborting benchmarks."); + } + (ts.tv_sec as u64) + .saturating_mul(1_000_000_000) + .saturating_add(ts.tv_nsec as u64) +} diff --git a/src/lind-perf/src/lib.rs b/src/lind-perf/src/lib.rs index d02bd71b9..3140cb791 100644 --- a/src/lind-perf/src/lib.rs +++ b/src/lind-perf/src/lib.rs @@ -1,7 +1,19 @@ -mod counter; -mod report; -mod timers; +#[cfg(not(feature = "enabled"))] +mod disabled; +#[cfg(feature = "enabled")] +mod enabled; -pub use counter::*; -pub use report::*; -pub use timers::*; +#[cfg(not(feature = "enabled"))] +pub use disabled::*; +#[cfg(feature = "enabled")] +pub use enabled::*; + +#[cfg(not(feature = "enabled"))] +pub static ENABLED: bool = false; +#[cfg(feature = "enabled")] +pub static ENABLED: bool = true; + +#[macro_export] +macro_rules! get_timer { + ($counter:path) => {{ $counter.get_timer() }}; +} diff --git a/src/lind-perf/src/report.rs b/src/lind-perf/src/report.rs index 449be5eca..bac63b905 100644 --- a/src/lind-perf/src/report.rs +++ b/src/lind-perf/src/report.rs @@ -1,3 +1,4 @@ +use crate::ENABLED; use crate::counter::Counter; use crate::timers::{PrettyDuration, TimerKind}; use std::sync::atomic::Ordering; @@ -5,6 +6,9 @@ use std::time::Duration; /// Print a section header. pub fn report_header(header: String) { + if !ENABLED { + return; + } let pad = "-"; let total = 97 - header.len(); let left = total / 2; @@ -17,6 +21,9 @@ pub fn report_header(header: String) { /// /// The report is sorted by definition order, not by cost. pub fn report(counters: &[&Counter]) { + if !ENABLED { + return; + } // Tunable constants const NAME_W: usize = 60; const CALLS_W: usize = 10; diff --git a/src/lind-perf/src/timers.rs b/src/lind-perf/src/timers.rs deleted file mode 100644 index 5b6a91935..000000000 --- a/src/lind-perf/src/timers.rs +++ /dev/null @@ -1,103 +0,0 @@ -use std::time::Duration; - -/// Formats nanosecond totals for reports. Converts nanosecond input to larger units where -/// appropriate and truncates to 3 decimal points. -pub struct PrettyDuration(pub Duration); - -impl std::fmt::Display for PrettyDuration { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let ns_f = self.0.as_nanos() as f64; - - let format = if ns_f < 1_000.0 { - format!("{:.3}ns", ns_f) - } else if ns_f < 1_000_000.0 { - format!("{:.3}µs", ns_f / 1_000.0) - } else if ns_f < 1_000_000_000.0 { - format!("{:.3}ms", ns_f / 1_000_000.0) - } else { - format!("{:.3}s", ns_f / 1_000_000_000.0) - }; - - write!(f, "{}", format) - } -} - -/// TimerKind defines the timer-backend to be used for benchmarks. We support two kinds of timers -/// currently, -/// -/// RDTSC: Time Stamp Counter that counts the number of CPU cycles that have elapsed. -/// Clock: Uses CLOCK_MONOTONIC_RAW to get the current time in nanoseconds. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum TimerKind { - Rdtsc = 0, - Clock = 1, -} - -/// Get the default timer. -pub const fn default_timer_kind() -> TimerKind { - TimerKind::Clock -} - -/// Public functions to record start and end times depending on the TimerKind being used. -#[inline(always)] -pub fn read_start(kind: TimerKind) -> u64 { - match kind { - TimerKind::Rdtsc => rdtsc_start(), - TimerKind::Clock => clock_now(), - } -} - -#[inline(always)] -pub fn read_end(kind: TimerKind) -> u64 { - match kind { - TimerKind::Rdtsc => rdtsc_end(), - TimerKind::Clock => clock_now(), - } -} - -#[inline(always)] -fn rdtsc_start() -> u64 { - // RDTSC is only available of x864 machines. - // In case this API is not exposed, default back to Clock. - #[cfg(target_arch = "x86_64")] - unsafe { - // From Intel's documentation : - // - // Perform a serializing operation on all load-from-memory instructions that were - // issued prior to this instruction. Guarantees that every load instruction that precedes, - // in program order, is globally visible before any load instruction which follows - // the fence in program order. - core::arch::x86_64::_mm_lfence(); - return core::arch::x86_64::_rdtsc(); - } - return clock_now(); -} - -#[inline(always)] -fn rdtsc_end() -> u64 { - #[cfg(target_arch = "x86_64")] - unsafe { - // End the TSC timer. - let mut aux = 0u32; - let tsc = core::arch::x86_64::__rdtscp(&mut aux); - // End the load fence. - core::arch::x86_64::_mm_lfence(); - return tsc; - } - return clock_now(); -} - -#[inline(always)] -fn clock_now() -> u64 { - let mut ts = libc::timespec { - tv_sec: 0, - tv_nsec: 0, - }; - let rc = unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC_RAW, &mut ts) }; - if rc != 0 { - panic!("Unable to get a CLOCK_MONOTONIC_RAW time. Aborting benchmarks."); - } - return (ts.tv_sec as u64) - .saturating_mul(1_000_000_000) - .saturating_add(ts.tv_nsec as u64); -} diff --git a/src/rawposix/Cargo.toml b/src/rawposix/Cargo.toml index 9e603cf3d..aa88c9a88 100644 --- a/src/rawposix/Cargo.toml +++ b/src/rawposix/Cargo.toml @@ -16,12 +16,12 @@ sysdefs = { path = "../sysdefs" } typemap = { path = "../typemap" } cage = { path = "../cage" } threei = { path = "../threei" } -lind-perf = { path = "../lind-perf", optional=true } +lind-perf = { path = "../lind-perf" } [features] default = ["fast"] fast = [] secure = [] -lind_perf = ["dep:lind-perf"] +lind_perf = ["lind-perf/enabled"] [dev-dependencies] diff --git a/src/threei/Cargo.toml b/src/threei/Cargo.toml index e88d65142..f16f42f4f 100644 --- a/src/threei/Cargo.toml +++ b/src/threei/Cargo.toml @@ -13,13 +13,13 @@ once_cell = "1.18" lazy_static = "1.4" parking_lot = "0.12" nodit = "0.9.2" # Used for VMMAP -lind-perf = { path = "../lind-perf", optional = true } +lind-perf = { path = "../lind-perf" } [features] default = ["hashmap"] hashmap = [] dashmap = [] -lind_perf = ["dep:lind-perf"] +lind_perf = ["lind-perf/enabled"] [dev-dependencies] serial_test = "3" diff --git a/src/wasmtime/crates/lind-common/Cargo.toml b/src/wasmtime/crates/lind-common/Cargo.toml index ae0b20bfc..5f44e091e 100644 --- a/src/wasmtime/crates/lind-common/Cargo.toml +++ b/src/wasmtime/crates/lind-common/Cargo.toml @@ -22,8 +22,8 @@ sysdefs = { path = "../sysdefs" } wasmtime-lind-3i = { path = "../lind-3i" } cage = { path = "../cage" } typemap = { path = "../typemap" } -lind-perf = { path = "../lind-perf", optional = true } +lind-perf = { path = "../lind-perf" } [features] lind_debug = [] -lind_perf = ["dep:lind-perf", "threei/lind_perf", "rawposix/lind_perf"] +lind_perf = ["lind-perf/enabled", "threei/lind_perf", "rawposix/lind_perf"] From 1fd4127536b76a2b37957f4f34b2978885fef51e Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Fri, 27 Feb 2026 01:29:13 -0500 Subject: [PATCH 03/10] Remove non commit-able files --- src/lind-boot/.cargo/config.toml | 2 -- src/lind-perf/Cargo.lock | 16 ---------------- 2 files changed, 18 deletions(-) delete mode 100644 src/lind-boot/.cargo/config.toml delete mode 100644 src/lind-perf/Cargo.lock diff --git a/src/lind-boot/.cargo/config.toml b/src/lind-boot/.cargo/config.toml deleted file mode 100644 index 00deea9ff..000000000 --- a/src/lind-boot/.cargo/config.toml +++ /dev/null @@ -1,2 +0,0 @@ -[build] -features = ["lind_perf"] diff --git a/src/lind-perf/Cargo.lock b/src/lind-perf/Cargo.lock deleted file mode 100644 index 27c6f0a05..000000000 --- a/src/lind-perf/Cargo.lock +++ /dev/null @@ -1,16 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "libc" -version = "0.2.182" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" - -[[package]] -name = "lind-perf" -version = "0.1.0" -dependencies = [ - "libc", -] From 0aa5cd2bf22c210fae09dcbb214e755bc29c3abe Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Fri, 27 Feb 2026 02:53:40 -0500 Subject: [PATCH 04/10] lind-perf refactors. updated docs. --- src/lind-boot/src/README.md | 38 ++++- src/lind-boot/src/cli.rs | 12 +- src/lind-boot/src/lind_wasmtime/trampoline.rs | 13 +- src/lind-boot/src/main.rs | 4 + src/lind-boot/src/perf.rs | 14 +- src/lind-perf/Cargo.toml | 4 +- src/lind-perf/README.md | 144 ++++++++---------- src/lind-perf/src/disabled.rs | 50 +----- src/lind-perf/src/enabled/counter.rs | 17 ++- src/lind-perf/src/enabled/mod.rs | 2 - src/lind-perf/src/lib.rs | 8 + src/lind-perf/src/report.rs | 81 ---------- src/lind-perf/src/{enabled => }/timers.rs | 38 ++++- 13 files changed, 191 insertions(+), 234 deletions(-) delete mode 100644 src/lind-perf/src/report.rs rename src/lind-perf/src/{enabled => }/timers.rs (53%) diff --git a/src/lind-boot/src/README.md b/src/lind-boot/src/README.md index 030b09ca9..3e0281f24 100644 --- a/src/lind-boot/src/README.md +++ b/src/lind-boot/src/README.md @@ -10,7 +10,8 @@ At a high level, lind-boot sits at the boundary between the command-line interfa src/ ├── main.rs ├── cli.rs -└── lind-wasmtime/ +├── perf.rs +└── lind_wasmtime/ ├── mod.rs ├── execute.rs ├── host.rs @@ -32,7 +33,10 @@ Supported flags: ```sh --verbose --debug + --precompile + --wasmtime-backtrace --env NAME[=VAL] + --perf[=clock|tsc] ``` ## Design Overview @@ -43,6 +47,11 @@ From the user’s perspective, lind-boot behaves like a conventional process lau Execution begins in main.rs, where command-line arguments are parsed and passed to the core execution logic. The entry point accepts a WebAssembly binary followed by program arguments. +The control flow is: +1. Handle `--precompile` early and exit. +2. If `--perf` is set, try to run in benchmark mode. +3. Otherwise run the normal single-execution path. + ### host.rs Host-side runtime state is encapsulated in `HostCtx`, defined in host.rs. This structure holds the WASI Preview1 context, the WASI threads context, and the Lind multi-process context. @@ -59,9 +68,34 @@ Before execution begins, lind-boot attaches all required host-side APIs to the W Module instantiation occurs in `load_main_module`. The WebAssembly module is instantiated inside a Lind cage, after which the runtime checks for and invokes the `main` function because of our glibc modification. The main entry point is then resolved, stack bounds are initialized, and signal and epoch-related state is set up for the main thread of the cage. At this point, the WebAssembly program is fully initialized and starts running code logic. -One responsibility of lind-boot is capturing and managing Wasmtime’s internal `VMContext` pointers. After instantiation, lind-boot extracts the `VMContext` associated with the running instance and stores it in a global table indexed by cage ID. Additional backup instances are created to populate a pool of `VMContext`s that can be reused during grate calls and syscall re-entry. (See more comments on lind-wasm/src/wasmtime/crates/lind-3i) +One responsibility of lind-boot is capturing and managing Wasmtime’s internal `VMContext` pointers. After instantiation, lind-boot extracts the `VMContext` associated with the running instance and stores it in a global table indexed by cage ID. Additional backup instances are created to populate a pool of `VMContext`s that can be reused during grate calls and syscall re-entry. (See more comments on `src/wasmtime/crates/lind-3i`) ### trampoline.rs The re-entry mechanism is implemented in trampoline.rs. When 3i routes a syscall to a grate, it invokes a unified callback function registered by lind-boot. This trampoline retrieves the appropriate `VMContext` for the target cage, re-enters the Wasmtime runtime using `Caller::with`, and invokes a unified entry function inside the WebAssembly module. Control is then dispatched to the appropriate syscall implementation based on the function pointer originally registered with 3i. Once execution completes, the VMContext is returned to the global pool for future use. +### perf.rs + +`perf.rs` defines `lind-perf` counters for lind-boot and dependency crates which are used by `main.rs` to run benchmarks. + +`lind-boot` supports performance benchmarking via `--perf`, with optional timer selection: + +```sh +# Default timer backend is CLOCK_MONOTONIC_RAW +lind-boot --perf=clock program.wasm + +# `=clock` is optional +lind-boot --perf program.wasm + +# Cycle-counter backend (RDTSC/RDTSCP on x86_64) +lind-boot --perf=tsc program.wasm +``` + +Perf mode runs the same workload multiple times, enabling one counter per run, then prints a final report. + +Important behavior: + +1. `--perf` is accepted by the CLI regardless of build mode. +2. If lind-boot is compiled without the crate feature `lind_perf`, `--perf` exits early with an explicit error. +3. The `lind_perf` feature in lind-boot enables `lind-perf/enabled`, which turns timing/reporting on. Without it, `lind-perf` stays linked but behaves as no-op. + diff --git a/src/lind-boot/src/cli.rs b/src/lind-boot/src/cli.rs index 58aec37a7..b573dca28 100644 --- a/src/lind-boot/src/cli.rs +++ b/src/lind-boot/src/cli.rs @@ -2,7 +2,9 @@ use clap::*; #[derive(Debug, Clone, Copy, ValueEnum)] pub enum PerfTimer { + /// Use `clock_gettime(CLOCK_MONOTONIC_RAW)` based timing. Clock, + /// Use RDTSC/RDTSCP cycle counter timing. Tsc, } @@ -48,7 +50,11 @@ pub struct CliOptions { /// Get performance information for the running module. /// - /// Requires compilation with `lind_perf` feature. + /// `--perf` defaults to `clock`; pass `--perf=tsc` for cycle-based timing. + /// + /// `--perf` is always accepted by the CLI, but execution only proceeds when + /// lind-boot is compiled with the crate feature `lind_perf` (which wires + /// `lind-perf/enabled`). #[arg( long, value_enum, @@ -74,6 +80,10 @@ impl CliOptions { } pub fn perf_timer_kind(&self) -> Option { + // Runtime gate for the perf CLI path: + // - if lind-boot was compiled without `lind_perf`, reject `--perf` early + // with a clear error. + // - otherwise map the CLI timer selection to lind-perf's timer backend. match lind_perf::ENABLED { false => match self.perf { Some(_) => { diff --git a/src/lind-boot/src/lind_wasmtime/trampoline.rs b/src/lind-boot/src/lind_wasmtime/trampoline.rs index 35b985c59..6e2c0348e 100644 --- a/src/lind-boot/src/lind_wasmtime/trampoline.rs +++ b/src/lind-boot/src/lind_wasmtime/trampoline.rs @@ -47,16 +47,16 @@ pub extern "C" fn grate_callback_trampoline( arg6: u64, arg6cageid: u64, ) -> i32 { + // This timer measures the entire function since it is never explicitly dropped. The timer + // therefore ends only when the function exits. let _grate_callback_timer = lind_perf::get_timer!(perf::GRATE_CALLBACK_TRAMPOLINE); - let vmctx_timer = lind_perf::get_timer!(perf::TRAMPOLINE_GET_VMCTX); let vmctx_wrapper: VmCtxWrapper = match get_vmctx(cageid) { Some(v) => v, None => { panic!("no VMContext found for cage_id {}", cageid); } }; - drop(vmctx_timer); // Convert back to VMContext let opaque: *mut VMOpaqueContext = vmctx_wrapper.as_ptr() as *mut VMOpaqueContext; @@ -97,8 +97,11 @@ pub extern "C" fn grate_callback_trampoline( u64, ), i32>(&mut store)?; + // This timer is used for a smaller snippet rather than an entire function or a scope, + // and therefore gets dropped manually to record the end time. + let _typed_func = lind_perf::get_timer!(perf::TYPED_FUNC_CALL); // Call the entry function with all arguments and in grate function pointer - typed_func.call( + let ret = typed_func.call( &mut store, ( in_grate_fn_ptr_u64, @@ -116,7 +119,9 @@ pub extern "C" fn grate_callback_trampoline( arg6, arg6cageid, ), - ) + ); + drop(_typed_func); + ret }) .unwrap_or(threei_const::GRATE_ERR) }; diff --git a/src/lind-boot/src/main.rs b/src/lind-boot/src/main.rs index 956f043a9..9d9eb8fb7 100644 --- a/src/lind-boot/src/main.rs +++ b/src/lind-boot/src/main.rs @@ -28,6 +28,9 @@ fn main() -> Result<(), Box> { return Ok(()); } + // Perf mode is a "one counter per run" workflow: + // initialize counters once, then rerun the same workload with each counter + // exclusively enabled so measurements do not overlap. if let Some(kind) = lindboot_cli.perf_timer_kind() { perf::perf_init(kind); @@ -36,6 +39,7 @@ fn main() -> Result<(), Box> { for counter in counters { perf::enable_one_counter(counter); + // Each perf sample gets a fresh RawPOSIX lifecycle boundary. rawposix_start(0); let _ = execute_wasmtime(lindboot_cli.clone()); rawposix_shutdown(); diff --git a/src/lind-boot/src/perf.rs b/src/lind-boot/src/perf.rs index 434d30533..356cbdb19 100644 --- a/src/lind-boot/src/perf.rs +++ b/src/lind-boot/src/perf.rs @@ -11,21 +11,25 @@ use lind_perf::{Counter, TimerKind}; // These are counters defined within lind-boot. pub static GRATE_CALLBACK_TRAMPOLINE: Counter = Counter::new("lind_boot::grate_callback_trampoline"); -pub static TRAMPOLINE_GET_VMCTX: Counter = Counter::new("lind_boot::trampoline::get_vmctx"); +pub static TYPED_FUNC_CALL: Counter = Counter::new("lind_boot::typed_func_call"); -pub static LIND_BOOT_COUNTERS: &[&Counter] = &[&GRATE_CALLBACK_TRAMPOLINE, &TRAMPOLINE_GET_VMCTX]; +// Counter list used by the perf runner in `main.rs`. Each benchmark iteration +// enables exactly one counter name from this list. +pub static LIND_BOOT_COUNTERS: &[&Counter] = &[&GRATE_CALLBACK_TRAMPOLINE, &TYPED_FUNC_CALL]; /// Initialize counters for all modules, involves setting the TimerKind and resetting the /// counts. pub fn perf_init(kind: TimerKind) { + // Configure timer backend (Clock or TSC) for all local counters. lind_perf::set_timer(LIND_BOOT_COUNTERS, kind); - lind_perf::reset_all(LIND_BOOT_COUNTERS); + // Reset all accumulated measurements before benchmark runs begin. + lind_perf::reset_all_counters(LIND_BOOT_COUNTERS); } /// Finds a counter by it's name and searches for it across modules to enable it. Disables all /// other counters. pub fn enable_one_counter(name: &str) { - lind_perf::enable_name(LIND_BOOT_COUNTERS, name); + lind_perf::enable_counter_by_name(LIND_BOOT_COUNTERS, name); } /// Get a list of all counter names. @@ -38,6 +42,8 @@ pub fn all_counter_names() -> Vec<&'static str> { /// Print a report for every module. pub fn perf_report() { + // Note: `lind_perf::report*` are no-ops when lind-perf is built without + // its internal `enabled` feature. lind_perf::report_header(format!("LIND-BOOT")); lind_perf::report(LIND_BOOT_COUNTERS); } diff --git a/src/lind-perf/Cargo.toml b/src/lind-perf/Cargo.toml index 02a95ab80..6d0147707 100644 --- a/src/lind-perf/Cargo.toml +++ b/src/lind-perf/Cargo.toml @@ -5,7 +5,7 @@ edition = "2024" [features] default = [] -enabled = ["dep:libc"] +enabled = [] [dependencies] -libc = { version = "0.2", optional = true } +libc = "0.2" diff --git a/src/lind-perf/README.md b/src/lind-perf/README.md index 1b0ac9376..9140f59b9 100644 --- a/src/lind-perf/README.md +++ b/src/lind-perf/README.md @@ -1,112 +1,88 @@ # lind-perf -`lind-perf` is a microbenchmarking library for lind-wasm. It generates timing reports for hot -paths in the syscall lifecycle by measuring the total time spent in specific functions across -modules. - -Sample output for running `close(-1)`: - -```bash -FDTABLE Test ................ ---------------------------------------------LIND-BOOT-------------------------------------------- -name calls total avg -------------------------------------------------------------------------------------------------- -lind_boot::load_main_module 1 111.482ms 111.482ms -lind_boot::invoke_func 1 111.282ms 111.282ms - --------------------------------------------LIND-COMMON------------------------------------------- -name calls total avg -------------------------------------------------------------------------------------------------- -lind_common::add_to_linker::make-syscall 1000000 94.274ms 94.000ns - ----------------------------------------------THREEI---------------------------------------------- -name calls total avg -------------------------------------------------------------------------------------------------- -threei::make_syscall 1000000 90.815ms 90.000ns - ---------------------------------------------RAWPOSIX--------------------------------------------- -name calls total avg -------------------------------------------------------------------------------------------------- -rawposix::close_syscall 1000000 21.255ms 21.000ns - ---------------------------------------------FDTABLES--------------------------------------------- -name calls total avg -------------------------------------------------------------------------------------------------- -fdtables::close_virtualfd 1000000 14.372ms 14.000ns -``` +`lind-perf` is the instrumentation crate used by Lind crates (for example `lind-boot`) to measure hot paths. + +This crate is defined in a manner where the callsites remain clean i.e. without needing conditional flags. This is implemented using the `enabled` feature. -## Building +The public APIs used remain the same, but if the crate is compiled without the `enabled` feature, each operation is a no-op ensuring that the final binary is not polluted with unused codepaths. -`lind-perf` is only included in the final binary if `--features lind_perf` is set during build. +## Build Modes -`make lind-boot-perf` is a shorthand for building a `release` version of `lind-boot` with `lind-perf` enabled. +`lind-perf` supports two compile-time modes via Cargo feature `enabled`: -## Running Benchmarks +1. `enabled` on: real counter accumulation + real reporting. +2. `enabled` off (default): API-compatible no-op behavior. -`lind-perf` will generate a report for any module that is run using `lind-boot` with the -`--perf` or `--perftsc` flag. +`lind-boot` maps its crate feature `lind_perf` to `lind-perf/enabled`. -e.g. `sudo lind-boot --perf libc_syscall.wasm` +## Public API -Standard benchmarks can be run using: [`./scripts/run_microbench.sh`](../../scripts/run_microbench.sh) +Main exports: -Flags: -- `--perf`: Uses the default Clock timer (nanoseconds) -- `--perftsc`: Uses the `rdtsc` timer (CPU cycles) +- `struct Counter` : Responsible for recording information such as cycles spent, calls made for a benchmarking site. +- `TimerKind::{Clock, Rdtsc}` : Clock uses `CLOCK_MONOTONIC_RAW`, Rdtsc: Time Stamp Counter. +- `fn set_timer(...)` : Set timer kind for list of Counters. +- `fn reset_all_counters(...)` : Reset Counters. +- `fn enable_counter_by_name(...)` : Enable Counter that matches the input name, disable the rest. +- `fn report(...)` : Print results from a set of counters. +- `static ENABLED: bool` : Check if `lind-perf` uses the `enabled` feature. -## Internals +Macro: -### How the timer works -Each benchmark site is a `Counter`. A counter tracks: -- total elapsed time across calls -- number of calls +- `lind_perf::get_timer!(COUNTER_PATH)` : Used to introduce a timer to a scope and start it. -Timing is scoped. The common pattern is: -1. Create a guard at the start of the function. -2. The guard records the start time immediately. -3. When the function returns, the guard is dropped and records the end time. -4. The elapsed time is added to the counter total and the call count increments. +## Typical Usage -This means early returns are timed as well. If the guard is dropped before the work -finishes (e.g., because of a `return foo(...)` expression), the measurement will be too -small. Keep the guard alive until after the work: +Define a counters: ```rust -let _scope = perf::enabled::YOUR_COUNTER.scope(); -let ret = (|| { +pub static MY_COUNTER: lind_perf::Counter = lind_perf::Counter::new("my_crate::my_counter"); +``` + +Use timer to time a scope: + +```rust +(|| { + let _timer = lind_perf::get_timer!(crate::perf::MY_COUNTER); // Starts the timer // measured work - ... -})(); -std::hint::black_box(&_scope); // Tells Rust to be pessimistic about optimizing this variable. -ret +})(); // Timer stops when dropped. ``` -### Ensuring only one active timer -`lind-boot` runs the benchmark module once per counter. On each run it enables exactly one -counter and disables the rest, then prints a report. This avoids stacked measurement overhead -from multiple counters running at the same time. +Timers can also be dropped manually for timing non-scope snippets: -The logic for this can be seen in [`lind-boot/src/main.rs`](../lind-boot/src/main.rs) +```rust +let _timer = lind_perf::get_timer!(crate::perf::MY_COUNTER); // Starts the timer +// measured work +drop(_timer); // Implicit drop +``` -### Adding a new benchmark site -Suppose we want to add a new timer in `threei` for the `copy_data_between_cages` function. We will need to make the following changes: +Counters can be enabled or disabled during runtime. The most common use-case for this is to sequentially enable a timer exclusively to avoid performance overheads. -1. Add a counter in `src/threei/src/perf.rs` and include it in `ALL_COUNTERS`. -2. Add a scoped timer in `src/threei/src/threei.rs` at the top of the `copy_data_between_cages` function. -3. Keep the guard alive until after the measured work if the function has multiple return paths. This can be done by moving measured work into an unnamed scope, and using the `std::hint::black_box` to avoid the scope being optimized out early. +```rust +lind_perf::set_timer(ALL_COUNTERS, lind_perf::TimerKind::Clock); +lind_perf::reset_all(ALL_COUNTERS); +lind_perf::enable_name(ALL_COUNTERS, "my_crate::my_counter"); +``` -In case we want to only benchmark a snippet of a function instead of the entire thing, we can `drop` the scope manually: +Print report: ```rust -let scope = perf::enabled::YOUR_COUNTER.scope(); -// measured snippet -drop(scope); +lind_perf::report_header("MY-CRATE".to_string()); +lind_perf::report(ALL_COUNTERS); ``` -### Adding a new crate -Currently the crates that are supported are `wasmtime_lind_common`, `fdtables`, `rawposix`, and `threei`. In order to add support for a new crate, the following changes are needed: +## Disabled Mode Semantics + +When `enabled` is not set: + +- `Counter` is a lightweight no-op type. +- `get_timer!` returns a no-op scope guard. +- `set_timer/reset_all/enable_name/report*` are no-ops. +- `read_start/read_end` return `0`. + +This allows instrumentation to remain in code without `cfg` guards at callsites. + +## Timer Backends -1. Add a `perf.rs` module to the new crate and define counters plus `ALL_COUNTERS`. -2. Export `ALL_COUNTERS` from the crate�~@~Ys `perf` module. -3. Add the crate�~@~Ys counters to `lind-boot` enumeration, enable/reset, and reporting. -4. Rebuild `lind-boot` with `--features lind_perf` to include the new module. +- `TimerKind::Clock`: uses `clock_gettime(CLOCK_MONOTONIC_RAW)` in enabled mode. +- `TimerKind::Rdtsc`: uses RDTSC/RDTSCP on `x86_64` (falls back to clock timing on non-`x86_64`). diff --git a/src/lind-perf/src/disabled.rs b/src/lind-perf/src/disabled.rs index c6ba0fb68..7c6d752bc 100644 --- a/src/lind-perf/src/disabled.rs +++ b/src/lind-perf/src/disabled.rs @@ -1,47 +1,4 @@ -use std::time::Duration; - -/// Formats nanosecond totals for reports. -pub struct PrettyDuration(pub Duration); - -impl std::fmt::Display for PrettyDuration { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let ns_f = self.0.as_nanos() as f64; - - let format = if ns_f < 1_000.0 { - format!("{:.3}ns", ns_f) - } else if ns_f < 1_000_000.0 { - format!("{:.3}us", ns_f / 1_000.0) - } else if ns_f < 1_000_000_000.0 { - format!("{:.3}ms", ns_f / 1_000_000.0) - } else { - format!("{:.3}s", ns_f / 1_000_000_000.0) - }; - - write!(f, "{}", format) - } -} - -/// TimerKind exists in both enabled and disabled builds for API consistency. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum TimerKind { - Rdtsc = 0, - Clock = 1, -} - -/// Get the default timer. -pub const fn default_timer_kind() -> TimerKind { - TimerKind::Clock -} - -#[inline(always)] -pub fn read_start(_kind: TimerKind) -> u64 { - 0 -} - -#[inline(always)] -pub fn read_end(_kind: TimerKind) -> u64 { - 0 -} +use crate::timers::TimerKind; /// Lightweight no-op counter representation for disabled builds. pub struct Counter; @@ -100,11 +57,12 @@ impl Drop for Scope { fn drop(&mut self) {} } -pub fn reset_all(_counters: &[&Counter]) {} +// No-op implementations for the rest. +pub fn reset_all_counters(_counters: &[&Counter]) {} pub fn set_timer(_counters: &[&Counter], _kind: TimerKind) {} -pub fn enable_name(_counters: &[&Counter], _name: &str) {} +pub fn enable_counter_by_name(_counters: &[&Counter], _name: &str) {} pub fn report_header(_header: String) {} diff --git a/src/lind-perf/src/enabled/counter.rs b/src/lind-perf/src/enabled/counter.rs index a640e90b8..413b90868 100644 --- a/src/lind-perf/src/enabled/counter.rs +++ b/src/lind-perf/src/enabled/counter.rs @@ -1,14 +1,22 @@ -use crate::{TimerKind, default_timer_kind, read_end, read_start}; +use crate::timers::{TimerKind, default_timer_kind, read_end, read_start}; use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; /// Counter stores information pertaining to a specific benchmarking site. +/// +/// Timing starts when the counter is introduced to the scope using get_timer() and ends once it +/// gets dropped. pub struct Counter { /// Counts the total number of CPU cycles or Nanoseconds spent. pub cycles: AtomicU64, /// Counts the total number of invocations. pub calls: AtomicU64, + /// Globally unique name for the counter. pub name: &'static str, - /// Only one Counter is globally enabled during a given run. + /// Depending on the workflow being timed, having multiple timers enabled at the same time can + /// lead to unacceptable timing overheads. To avoid this, each Counter can be dynamically + /// enabled or disabled. + /// + /// When disabled, operations such as start() or end() are no-ops. pub enabled: AtomicBool, /// Stores TimerKind. timer: AtomicU8, @@ -26,6 +34,7 @@ impl Counter { } } + /// Get the name of the Counter pub fn get_name(&self) -> Option<&'static str> { Some(self.name) } @@ -106,7 +115,7 @@ impl Drop for Scope<'_> { } /// Reset all counters in a group. -pub fn reset_all(counters: &[&Counter]) { +pub fn reset_all_counters(counters: &[&Counter]) { for c in counters { c.reset(); } @@ -120,7 +129,7 @@ pub fn set_timer(counters: &[&Counter], kind: TimerKind) { } /// Enable only the named counter in a group. -pub fn enable_name(counters: &[&Counter], name: &str) { +pub fn enable_counter_by_name(counters: &[&Counter], name: &str) { for c in counters { if c.name == name { c.enable(); diff --git a/src/lind-perf/src/enabled/mod.rs b/src/lind-perf/src/enabled/mod.rs index d02bd71b9..5fb15b0bd 100644 --- a/src/lind-perf/src/enabled/mod.rs +++ b/src/lind-perf/src/enabled/mod.rs @@ -1,7 +1,5 @@ mod counter; mod report; -mod timers; pub use counter::*; pub use report::*; -pub use timers::*; diff --git a/src/lind-perf/src/lib.rs b/src/lind-perf/src/lib.rs index 3140cb791..17b465bb4 100644 --- a/src/lind-perf/src/lib.rs +++ b/src/lind-perf/src/lib.rs @@ -1,13 +1,19 @@ +mod timers; + #[cfg(not(feature = "enabled"))] mod disabled; #[cfg(feature = "enabled")] mod enabled; +pub use timers::*; + #[cfg(not(feature = "enabled"))] pub use disabled::*; #[cfg(feature = "enabled")] pub use enabled::*; +// Exported runtime flag used by callers (for example lind-boot CLI handling) +// to decide whether a requested perf mode can actually run. #[cfg(not(feature = "enabled"))] pub static ENABLED: bool = false; #[cfg(feature = "enabled")] @@ -15,5 +21,7 @@ pub static ENABLED: bool = true; #[macro_export] macro_rules! get_timer { + // Always available macro. In disabled builds, `get_timer()` returns a + // no-op scope object from `disabled::Counter`. ($counter:path) => {{ $counter.get_timer() }}; } diff --git a/src/lind-perf/src/report.rs b/src/lind-perf/src/report.rs deleted file mode 100644 index bac63b905..000000000 --- a/src/lind-perf/src/report.rs +++ /dev/null @@ -1,81 +0,0 @@ -use crate::ENABLED; -use crate::counter::Counter; -use crate::timers::{PrettyDuration, TimerKind}; -use std::sync::atomic::Ordering; -use std::time::Duration; - -/// Print a section header. -pub fn report_header(header: String) { - if !ENABLED { - return; - } - let pad = "-"; - let total = 97 - header.len(); - let left = total / 2; - let right = total - left; - - println!("\n{}{}{}", pad.repeat(left), header, pad.repeat(right),); -} - -/// Print a report for a counter group. -/// -/// The report is sorted by definition order, not by cost. -pub fn report(counters: &[&Counter]) { - if !ENABLED { - return; - } - // Tunable constants - const NAME_W: usize = 60; - const CALLS_W: usize = 10; - const NUM_W: usize = 12; - - let mut rows: Vec = Vec::new(); - - for c in counters { - let calls = c.calls.load(Ordering::Relaxed); - if calls == 0 { - continue; - } - - let cycles = match c.timer_kind() { - TimerKind::Rdtsc => format!("{:#?}", c.cycles.load(Ordering::Relaxed)), - TimerKind::Clock => format!( - "{}", - PrettyDuration(Duration::from_nanos(c.cycles.load(Ordering::Relaxed))) - ), - }; - - let avg = match c.timer_kind() { - TimerKind::Rdtsc => format!("{:#?}", c.cycles.load(Ordering::Relaxed) / calls), - TimerKind::Clock => format!( - "{}", - PrettyDuration(Duration::from_nanos( - c.cycles.load(Ordering::Relaxed) / calls - )) - ), - }; - - // {:CALLS_W$} {:>NUM_W$} {:>NUM_W$}", - c.name, calls, cycles, avg, - )); - } - - if rows.len() == 0 { - return; - } - - eprintln!( - "{:CALLS_W$} {:>NUM_W$} {:>NUM_W$}", - "name", "calls", "total", "avg", - ); - - eprintln!("{}", "-".repeat(NAME_W + CALLS_W + NUM_W * 2 + 3)); - - for i in rows { - eprintln!("{}", i); - } - - println!(""); -} diff --git a/src/lind-perf/src/enabled/timers.rs b/src/lind-perf/src/timers.rs similarity index 53% rename from src/lind-perf/src/enabled/timers.rs rename to src/lind-perf/src/timers.rs index c1945ca55..022e9a308 100644 --- a/src/lind-perf/src/enabled/timers.rs +++ b/src/lind-perf/src/timers.rs @@ -12,43 +12,73 @@ pub const fn default_timer_kind() -> TimerKind { /// Public functions to record start and end times depending on the TimerKind being used. #[inline(always)] -pub fn read_start(kind: TimerKind) -> u64 { - match kind { +pub fn read_start(_kind: TimerKind) -> u64 { + #[cfg(feature = "enabled")] + match _kind { TimerKind::Rdtsc => rdtsc_start(), TimerKind::Clock => clock_now(), } + + #[cfg(not(feature = "enabled"))] + 0 } #[inline(always)] -pub fn read_end(kind: TimerKind) -> u64 { - match kind { +pub fn read_end(_kind: TimerKind) -> u64 { + #[cfg(feature = "enabled")] + match _kind { TimerKind::Rdtsc => rdtsc_end(), TimerKind::Clock => clock_now(), } + + #[cfg(not(feature = "enabled"))] + 0 } +// RDTSC timers for measuring CPU Cycles. +// Only available on x86 based processors. #[inline(always)] fn rdtsc_start() -> u64 { #[cfg(target_arch = "x86_64")] unsafe { + // Serialize execution before reading the TSC so that + // no prior loads or instructions are speculatively + // reordered past the timestamp read. + // + // See: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence&ig_expand=3977 core::arch::x86_64::_mm_lfence(); + + // Get the RDTSC counter. return core::arch::x86_64::_rdtsc(); } + + #[cfg(not(target_arch = "x86_64"))] clock_now() } +// Separate start/end functions are required for RDTSC because +// fencing semantics differ before and after the measurement. #[inline(always)] fn rdtsc_end() -> u64 { #[cfg(target_arch = "x86_64")] unsafe { let mut aux = 0u32; + // RDTSCP is partially serializing: it waits for prior + // instructions to complete before reading the TSC. let tsc = core::arch::x86_64::__rdtscp(&mut aux); + + // Fence after the read to prevent subsequent instructions + // from being speculatively executed before the timestamp. core::arch::x86_64::_mm_lfence(); return tsc; } + + #[cfg(not(target_arch = "x86_64"))] clock_now() } +// CLOCK_MONOTONIC_RAW based timer used for nanoseconds measurements. Same function can be used for +// start and end. #[inline(always)] fn clock_now() -> u64 { let mut ts = libc::timespec { From 982d3da03e7788ee5ccb1e8b7750605d7b07b57c Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Fri, 27 Feb 2026 12:49:04 -0500 Subject: [PATCH 05/10] Update comments in main.rs --- src/lind-boot/src/main.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lind-boot/src/main.rs b/src/lind-boot/src/main.rs index 9d9eb8fb7..312c6dde0 100644 --- a/src/lind-boot/src/main.rs +++ b/src/lind-boot/src/main.rs @@ -28,23 +28,27 @@ fn main() -> Result<(), Box> { return Ok(()); } - // Perf mode is a "one counter per run" workflow: - // initialize counters once, then rerun the same workload with each counter - // exclusively enabled so measurements do not overlap. + // Check if --perf is enabled and avaible to decide whether to run in benchmarking mode. if let Some(kind) = lindboot_cli.perf_timer_kind() { + // Initialize all counters. perf::perf_init(kind); let counters = perf::all_counter_names(); + // Iterate over all counters: + // - Exclusively enable the counter + // - Run the program to gather timing data. for counter in counters { perf::enable_one_counter(counter); - // Each perf sample gets a fresh RawPOSIX lifecycle boundary. + // Each sample run gets a fresh RawPOSIX lifecycle boundary to imitate actual + // behaviour. rawposix_start(0); let _ = execute_wasmtime(lindboot_cli.clone()); rawposix_shutdown(); } + // Output final numbers to stdout. perf::perf_report(); return Ok(()); From d7362656cb7b435a610a8d333e52846f8c8332fd Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Fri, 27 Feb 2026 14:35:24 -0500 Subject: [PATCH 06/10] Use iterators --- src/lind-boot/src/perf.rs | 18 +++++++++--------- src/lind-perf/src/disabled.rs | 10 ++++------ src/lind-perf/src/enabled/counter.rs | 6 +++--- src/lind-perf/src/enabled/report.rs | 16 ++++++---------- 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/src/lind-boot/src/perf.rs b/src/lind-boot/src/perf.rs index 356cbdb19..42b0038c4 100644 --- a/src/lind-boot/src/perf.rs +++ b/src/lind-boot/src/perf.rs @@ -21,29 +21,29 @@ pub static LIND_BOOT_COUNTERS: &[&Counter] = &[&GRATE_CALLBACK_TRAMPOLINE, &TYPE /// counts. pub fn perf_init(kind: TimerKind) { // Configure timer backend (Clock or TSC) for all local counters. - lind_perf::set_timer(LIND_BOOT_COUNTERS, kind); + lind_perf::set_timer(all_counters(), kind); // Reset all accumulated measurements before benchmark runs begin. - lind_perf::reset_all_counters(LIND_BOOT_COUNTERS); + lind_perf::reset_all_counters(all_counters()); } /// Finds a counter by it's name and searches for it across modules to enable it. Disables all /// other counters. pub fn enable_one_counter(name: &str) { - lind_perf::enable_counter_by_name(LIND_BOOT_COUNTERS, name); + lind_perf::enable_counter_by_name(all_counters(), name); +} + +fn all_counters() -> impl Iterator { + LIND_BOOT_COUNTERS.iter().copied() } /// Get a list of all counter names. pub fn all_counter_names() -> Vec<&'static str> { - LIND_BOOT_COUNTERS - .iter() - .filter_map(|c| c.get_name()) - .collect() + all_counters().filter_map(|c| c.get_name()).collect() } /// Print a report for every module. pub fn perf_report() { // Note: `lind_perf::report*` are no-ops when lind-perf is built without // its internal `enabled` feature. - lind_perf::report_header(format!("LIND-BOOT")); - lind_perf::report(LIND_BOOT_COUNTERS); + lind_perf::report(LIND_BOOT_COUNTERS, format!("LIND-BOOT")); } diff --git a/src/lind-perf/src/disabled.rs b/src/lind-perf/src/disabled.rs index 7c6d752bc..91e938259 100644 --- a/src/lind-perf/src/disabled.rs +++ b/src/lind-perf/src/disabled.rs @@ -58,12 +58,10 @@ impl Drop for Scope { } // No-op implementations for the rest. -pub fn reset_all_counters(_counters: &[&Counter]) {} +pub fn reset_all_counters(_counters: impl IntoIterator) {} -pub fn set_timer(_counters: &[&Counter], _kind: TimerKind) {} +pub fn set_timer(_counters: impl IntoIterator, _kind: TimerKind) {} -pub fn enable_counter_by_name(_counters: &[&Counter], _name: &str) {} +pub fn enable_counter_by_name(_counters: impl IntoIterator, _name: &str) {} -pub fn report_header(_header: String) {} - -pub fn report(_counters: &[&Counter]) {} +pub fn report(_counters: impl IntoIterator, _header: String) {} diff --git a/src/lind-perf/src/enabled/counter.rs b/src/lind-perf/src/enabled/counter.rs index 413b90868..0d83d74d7 100644 --- a/src/lind-perf/src/enabled/counter.rs +++ b/src/lind-perf/src/enabled/counter.rs @@ -115,21 +115,21 @@ impl Drop for Scope<'_> { } /// Reset all counters in a group. -pub fn reset_all_counters(counters: &[&Counter]) { +pub fn reset_all_counters(counters: impl IntoIterator) { for c in counters { c.reset(); } } /// Set a timer for a counter group. -pub fn set_timer(counters: &[&Counter], kind: TimerKind) { +pub fn set_timer(counters: impl IntoIterator, kind: TimerKind) { for c in counters { c.set_timer_kind(kind); } } /// Enable only the named counter in a group. -pub fn enable_counter_by_name(counters: &[&Counter], name: &str) { +pub fn enable_counter_by_name(counters: impl IntoIterator, name: &str) { for c in counters { if c.name == name { c.enable(); diff --git a/src/lind-perf/src/enabled/report.rs b/src/lind-perf/src/enabled/report.rs index dc0036cdb..a8c2f2d75 100644 --- a/src/lind-perf/src/enabled/report.rs +++ b/src/lind-perf/src/enabled/report.rs @@ -24,22 +24,18 @@ impl std::fmt::Display for PrettyDuration { } } -/// Print a section header. -pub fn report_header(header: String) { +/// Print a report for a counter group. +pub fn report(counters: &[&Counter], header: String) { + const NAME_W: usize = 60; + const CALLS_W: usize = 10; + const NUM_W: usize = 12; + let pad = "-"; let total = 97 - header.len(); let left = total / 2; let right = total - left; println!("\n{}{}{}", pad.repeat(left), header, pad.repeat(right),); -} - -/// Print a report for a counter group. -pub fn report(counters: &[&Counter]) { - const NAME_W: usize = 60; - const CALLS_W: usize = 10; - const NUM_W: usize = 12; - let mut rows: Vec = Vec::new(); for c in counters { From 6dad95c56f2ca00d1c1363e0276ff05881724bf9 Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Fri, 27 Feb 2026 15:10:40 -0500 Subject: [PATCH 07/10] don't use iterator for report --- src/lind-perf/src/disabled.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lind-perf/src/disabled.rs b/src/lind-perf/src/disabled.rs index 91e938259..83b3aeefc 100644 --- a/src/lind-perf/src/disabled.rs +++ b/src/lind-perf/src/disabled.rs @@ -64,4 +64,4 @@ pub fn set_timer(_counters: impl IntoIterator, _kind: T pub fn enable_counter_by_name(_counters: impl IntoIterator, _name: &str) {} -pub fn report(_counters: impl IntoIterator, _header: String) {} +pub fn report(_counters: &[&Counter], _header: String) {} From 593b74e3b5d01e54877c9606fb03bf97b032f727 Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Fri, 27 Feb 2026 20:54:41 -0500 Subject: [PATCH 08/10] Fix Build: Update Dockerfile.e2e to include lind-perf | Fix linting errors --- Docker/Dockerfile.e2e | 6 ++++-- src/lind-boot/src/lind_wasmtime/execute.rs | 6 +++--- src/lind-boot/src/main.rs | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Docker/Dockerfile.e2e b/Docker/Dockerfile.e2e index e441ca1ad..64410ccc8 100644 --- a/Docker/Dockerfile.e2e +++ b/Docker/Dockerfile.e2e @@ -104,19 +104,21 @@ ENV PATH="/root/.cargo/bin:${PATH}" # Build lind-boot FROM base as build-lind-boot # NOTE: Using 'make' risks cache invalidation on unrelated Makefile changes -COPY --parents src/lind-boot src/wasmtime src/rawposix src/cage src/threei src/typemap src/fdtables src/sysdefs Makefile rust-toolchain.toml . +COPY --parents src/lind-boot src/wasmtime src/rawposix src/cage src/threei src/typemap src/fdtables src/sysdefs src/lind-perf Makefile rust-toolchain.toml . RUN rm -f src/wasmtime/crates/cage \ src/wasmtime/crates/threei \ src/wasmtime/crates/fdtables \ src/wasmtime/crates/typemap \ src/wasmtime/crates/sysdefs \ src/wasmtime/crates/rawposix \ + src/wasmtime/crates/lind-perf \ && ln -s ../../cage src/wasmtime/crates/cage \ && ln -s ../../threei src/wasmtime/crates/threei \ && ln -s ../../fdtables src/wasmtime/crates/fdtables \ && ln -s ../../typemap src/wasmtime/crates/typemap \ && ln -s ../../sysdefs src/wasmtime/crates/sysdefs \ - && ln -s ../../rawposix src/wasmtime/crates/rawposix + && ln -s ../../rawposix src/wasmtime/crates/rawposix \ + && ln -s ../../lind-perf src/wasmtime/crates/lind-perf RUN make lind-boot diff --git a/src/lind-boot/src/lind_wasmtime/execute.rs b/src/lind-boot/src/lind_wasmtime/execute.rs index 736d5154d..1473a1358 100644 --- a/src/lind-boot/src/lind_wasmtime/execute.rs +++ b/src/lind-boot/src/lind_wasmtime/execute.rs @@ -1,5 +1,5 @@ use crate::{cli::CliOptions, lind_wasmtime::host::HostCtx, lind_wasmtime::trampoline::*}; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{Context, Result, anyhow, bail}; use cage::signal::{lind_signal_init, signal_may_trigger}; use cfg_if::cfg_if; use std::ffi::c_void; @@ -13,8 +13,8 @@ use wasmtime::{ AsContextMut, Engine, Func, InstantiateType, Linker, Module, Precompiled, Store, Val, ValType, WasmBacktraceDetails, }; -use wasmtime_lind_3i::{init_vmctx_pool, rm_vmctx, set_vmctx, set_vmctx_thread, VmCtxWrapper}; -use wasmtime_lind_multi_process::{LindCtx, CAGE_START_ID, THREAD_START_ID}; +use wasmtime_lind_3i::{VmCtxWrapper, init_vmctx_pool, rm_vmctx, set_vmctx, set_vmctx_thread}; +use wasmtime_lind_multi_process::{CAGE_START_ID, LindCtx, THREAD_START_ID}; use wasmtime_lind_utils::LindCageManager; use wasmtime_wasi_threads::WasiThreadsCtx; diff --git a/src/lind-boot/src/main.rs b/src/lind-boot/src/main.rs index 19c46e6ee..b8dac38ea 100644 --- a/src/lind-boot/src/main.rs +++ b/src/lind-boot/src/main.rs @@ -67,7 +67,7 @@ fn main() -> Result<(), Box> { } // Not a precompile command, chroot to lindfs chroot_to_lindfs(); - + // Check if --perf is enabled and avaible to decide whether to run in benchmarking mode. if let Some(kind) = lindboot_cli.perf_timer_kind() { // Initialize all counters. From 4ce5aa1e3dd5b126a6dcd4936e2c7441269b98ad Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Mon, 9 Mar 2026 00:23:14 -0400 Subject: [PATCH 09/10] Use println! everywhere in report. --- src/lind-perf/src/enabled/report.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lind-perf/src/enabled/report.rs b/src/lind-perf/src/enabled/report.rs index a8c2f2d75..035c07638 100644 --- a/src/lind-perf/src/enabled/report.rs +++ b/src/lind-perf/src/enabled/report.rs @@ -72,14 +72,14 @@ pub fn report(counters: &[&Counter], header: String) { return; } - eprintln!( + println!( "{:CALLS_W$} {:>NUM_W$} {:>NUM_W$}", "name", "calls", "total", "avg", ); - eprintln!("{}", "-".repeat(NAME_W + CALLS_W + NUM_W * 2 + 3)); + println!("{}", "-".repeat(NAME_W + CALLS_W + NUM_W * 2 + 3)); for row in rows { - eprintln!("{}", row); + println!("{}", row); } println!(""); From 55e47d5e5db6d0052d6f5f62d48e4c1a468bb5bb Mon Sep 17 00:00:00 2001 From: Sanchit Sahay Date: Mon, 9 Mar 2026 00:25:57 -0400 Subject: [PATCH 10/10] remove .gitignore --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index 555e296a9..e6da03113 100644 --- a/.gitignore +++ b/.gitignore @@ -17,8 +17,5 @@ e2e_status report.html results.json -#Ignore local files -.DS_Store -target/ #Ignore python cache __pycache__/