From 04bdb8271d1948485005d33cc074aa123228a9dc Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Tue, 24 Feb 2026 21:10:33 +0100 Subject: [PATCH 1/4] chore: expose additional types for rocksdb externalization --- miden-crypto/src/merkle/smt/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/miden-crypto/src/merkle/smt/mod.rs b/miden-crypto/src/merkle/smt/mod.rs index 2dcf4ea8ee..dacacbcdaa 100644 --- a/miden-crypto/src/merkle/smt/mod.rs +++ b/miden-crypto/src/merkle/smt/mod.rs @@ -20,8 +20,8 @@ mod large; pub use full::concurrent::{SubtreeLeaf, build_subtree_for_bench}; #[cfg(feature = "concurrent")] pub use large::{ - LargeSmt, LargeSmtError, MemoryStorage, SmtStorage, StorageUpdateParts, StorageUpdates, - Subtree, SubtreeError, + LargeSmt, LargeSmtError, MemoryStorage, SmtStorage, StorageError, StorageUpdateParts, + StorageUpdates, Subtree, SubtreeError, SubtreeUpdate, }; #[cfg(feature = "rocksdb")] pub use large::{RocksDbConfig, RocksDbStorage}; From e0728398faf019db4ab96a67ec06d84dde2966ee Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Tue, 24 Feb 2026 21:08:18 +0100 Subject: [PATCH 2/4] drop rocksdb large smt backend --- .config/nextest.toml | 4 +- .github/workflows/lint.yml | 10 - Cargo.lock | 138 -- Makefile | 14 +- miden-crypto/Cargo.toml | 11 - miden-crypto/benches/large-smt.rs | 155 -- miden-crypto/src/main.rs | 85 +- .../src/merkle/smt/large/batch_ops.rs | 4 +- miden-crypto/src/merkle/smt/large/mod.rs | 111 +- .../src/merkle/smt/large/storage/mod.rs | 5 - .../src/merkle/smt/large/storage/rocksdb.rs | 1314 ----------------- .../src/merkle/smt/large/subtree/mod.rs | 2 +- miden-crypto/src/merkle/store/mod.rs | 2 +- miden-crypto/tests/rocksdb_large_smt.rs | 147 -- 14 files changed, 15 insertions(+), 1987 deletions(-) delete mode 100644 miden-crypto/benches/large-smt.rs delete mode 100644 miden-crypto/src/merkle/smt/large/storage/rocksdb.rs delete mode 100644 miden-crypto/tests/rocksdb_large_smt.rs diff --git a/.config/nextest.toml b/.config/nextest.toml index 9268e014ec..e7ca0a8686 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,5 +1,5 @@ [profile.default] -default-filter = 'not (test(merkle::smt::full::concurrent) or test(merkle::smt::full::large) or binary(rocksdb_large_smt))' +default-filter = 'not (test(merkle::smt::full::concurrent) or test(merkle::smt::full::large))' fail-fast = false failure-output = "immediate-final" @@ -9,6 +9,6 @@ fail-fast = false failure-output = "immediate-final" [profile.large-smt] -default-filter = '(test(merkle::smt::full::large) or binary(rocksdb_large_smt))' +default-filter = 'test(merkle::smt::full::large)' fail-fast = false failure-output = "immediate-final" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 6fb6141955..032c7f5075 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -42,11 +42,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - # Added: LLVM/Clang for RocksDB/bindgen - - name: Install LLVM/Clang - uses: KyleMayes/install-llvm-action@v2 - with: - version: "17" - name: Rustup run: | rustup update --no-self-update @@ -83,11 +78,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - # Added: LLVM/Clang for RocksDB/bindgen - - name: Install LLVM/Clang - uses: ./.github/actions/install-llvm - with: - version: "17" - name: Rustup run: rustup update --no-self-update - uses: Swatinem/rust-cache@v2 diff --git a/Cargo.lock b/Cargo.lock index af41e56842..7c26e7be25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,24 +119,6 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" -[[package]] -name = "bindgen" -version = "0.72.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn", -] - [[package]] name = "bitflags" version = "2.10.0" @@ -171,16 +153,6 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] - [[package]] name = "cast" version = "0.3.0" @@ -199,15 +171,6 @@ dependencies = [ "shlex", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" version = "1.0.4" @@ -276,17 +239,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" version = "4.5.53" @@ -871,47 +823,12 @@ version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" -[[package]] -name = "libloading" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" -dependencies = [ - "cfg-if", - "windows-link", -] - [[package]] name = "libm" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" -[[package]] -name = "librocksdb-sys" -version = "0.17.3+10.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef2a00ee60fe526157c9023edab23943fae1ce2ab6f4abb2a807c1746835de9" -dependencies = [ - "bindgen", - "bzip2-sys", - "cc", - "libc", - "libz-sys", - "lz4-sys", -] - -[[package]] -name = "libz-sys" -version = "1.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -927,16 +844,6 @@ dependencies = [ "scopeguard", ] -[[package]] -name = "lz4-sys" -version = "1.11.1+lz4-1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "memchr" version = "2.7.6" @@ -971,7 +878,6 @@ dependencies = [ "rand_core 0.9.3", "rand_hc", "rayon", - "rocksdb", "rstest", "seq-macro", "serde", @@ -995,12 +901,6 @@ dependencies = [ "syn", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "nanorand" version = "0.7.0" @@ -1010,16 +910,6 @@ dependencies = [ "getrandom 0.2.16", ] -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num" version = "0.4.3" @@ -1140,12 +1030,6 @@ dependencies = [ "spki", ] -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - [[package]] name = "plotters" version = "0.3.7" @@ -1362,16 +1246,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "rocksdb" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddb7af00d2b17dbd07d82c0063e25411959748ff03e8d4f96134c2ff41fce34f" -dependencies = [ - "libc", - "librocksdb-sys", -] - [[package]] name = "rstest" version = "0.26.1" @@ -1401,12 +1275,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - [[package]] name = "rustc_version" version = "0.4.1" @@ -1717,12 +1585,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "version_check" version = "0.9.5" diff --git a/Makefile b/Makefile index 55d2835dcc..37f561be08 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ help: # -- variables -------------------------------------------------------------------------------------- -ALL_FEATURES_EXCEPT_ROCKSDB="concurrent executable hashmaps internal serde std" +ALL_FEATURES="concurrent executable hashmaps internal serde std" DEBUG_OVERFLOW_INFO=RUSTFLAGS="-C debug-assertions -C overflow-checks -C debuginfo=2" WARNINGS=RUSTDOCFLAGS="-D warnings" @@ -64,7 +64,7 @@ doc: ## Generate and check documentation .PHONY: test-default test-default: ## Run tests with default features - $(DEBUG_OVERFLOW_INFO) cargo nextest run --profile default --release --features ${ALL_FEATURES_EXCEPT_ROCKSDB} + $(DEBUG_OVERFLOW_INFO) cargo nextest run --profile default --release --features ${ALL_FEATURES} .PHONY: test-hashmaps test-hashmaps: ## Run tests with `hashmaps` feature enabled @@ -84,7 +84,7 @@ test-docs: .PHONY: test-large-smt test-large-smt: ## Run only large SMT tests - $(DEBUG_OVERFLOW_INFO) cargo nextest run --success-output immediate --profile large-smt --release --features hashmaps,rocksdb + $(DEBUG_OVERFLOW_INFO) cargo nextest run --success-output immediate --profile large-smt --release --features hashmaps .PHONY: test test: test-default test-hashmaps test-no-std test-docs test-large-smt ## Run all tests except concurrent SMT tests @@ -135,14 +135,6 @@ bench-smt-concurrent: ## Run SMT benchmarks with concurrent feature bench-large-smt-memory: ## Run large SMT benchmarks with memory storage cargo run --release --features concurrent,hashmaps,executable -- --size 1000000 -.PHONY: bench-large-smt-rocksdb -bench-large-smt-rocksdb: ## Run large SMT benchmarks with rocksdb storage - cargo run --release --features concurrent,hashmaps,rocksdb,executable -- --storage rocksdb --size 1000000 - -.PHONY: bench-large-smt-rocksdb-open -bench-large-smt-rocksdb-open: ## Run large SMT benchmarks with rocksdb storage and open existing database - cargo run --release --features concurrent,hashmaps,rocksdb,executable -- --storage rocksdb --open - # --- fuzzing -------------------------------------------------------------------------------- .PHONY: fuzz-smt diff --git a/miden-crypto/Cargo.toml b/miden-crypto/Cargo.toml index 5f731e80c0..9608498e19 100644 --- a/miden-crypto/Cargo.toml +++ b/miden-crypto/Cargo.toml @@ -36,11 +36,6 @@ required-features = ["internal"] harness = false name = "merkle" -[[bench]] -harness = false -name = "large-smt" -required-features = ["rocksdb"] - [[bench]] harness = false name = "store" @@ -64,7 +59,6 @@ executable = ["dep:clap", "dep:rand-utils", "std"] fuzzing = [] hashmaps = ["dep:hashbrown"] internal = [] -rocksdb = ["concurrent", "dep:rocksdb"] serde = ["dep:serde", "serde?/alloc", "winter-math/serde"] std = [ "blake3/std", @@ -95,7 +89,6 @@ rand_chacha = { default-features = false, version = "0.9" } rand_core = { default-features = false, version = "0.9" } rand_hc = { version = "0.3" } rayon = { optional = true, version = "1.10" } -rocksdb = { default-features = false, features = ["bindgen-runtime", "lz4"], optional = true, version = "0.24" } serde = { default-features = false, features = ["derive"], optional = true, version = "1.0" } sha2 = { default-features = false, version = "0.10" } sha3 = { default-features = false, version = "0.10" } @@ -127,7 +120,3 @@ workspace = true [package.metadata.cargo-machete] ignored = ["getrandom"] - -[[test]] -name = "rocksdb_large_smt" -required-features = ["concurrent", "rocksdb"] diff --git a/miden-crypto/benches/large-smt.rs b/miden-crypto/benches/large-smt.rs deleted file mode 100644 index 6d1daf0eaf..0000000000 --- a/miden-crypto/benches/large-smt.rs +++ /dev/null @@ -1,155 +0,0 @@ -use std::hint; - -use criterion::{Criterion, criterion_group, criterion_main}; -use miden_crypto::{ - Word, - merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, -}; - -mod common; -use common::*; - -use crate::{ - common::data::{generate_smt_entries_sequential, generate_test_keys_sequential}, - config::{DEFAULT_MEASUREMENT_TIME, DEFAULT_SAMPLE_SIZE}, -}; - -benchmark_with_setup_data! { - large_smt_open, - DEFAULT_MEASUREMENT_TIME, - DEFAULT_SAMPLE_SIZE, - "open", - || { - let entries = generate_smt_entries_sequential(256); - let keys = generate_test_keys_sequential(10); - let temp_dir = tempfile::TempDir::new().unwrap(); - let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap(); - let smt = LargeSmt::with_entries(storage, entries).unwrap(); - (smt, keys, temp_dir) - }, - |b: &mut criterion::Bencher, (smt, keys, _temp_dir): &(LargeSmt, Vec, tempfile::TempDir)| { - b.iter(|| { - for key in keys { - hint::black_box(smt.open(key)); - } - }) - }, -} - -benchmark_with_setup_data! { - large_smt_compute_mutations, - DEFAULT_MEASUREMENT_TIME, - DEFAULT_SAMPLE_SIZE, - "compute_mutations", - || { - let entries = generate_smt_entries_sequential(256); - let temp_dir = tempfile::TempDir::new().unwrap(); - let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap(); - let smt = LargeSmt::with_entries(storage, entries).unwrap(); - let new_entries = generate_smt_entries_sequential(10_000); - (smt, new_entries, temp_dir) - }, - |b: &mut criterion::Bencher, (smt, new_entries, _temp_dir): &(LargeSmt, Vec<(Word, Word)>, tempfile::TempDir)| { - b.iter(|| { - hint::black_box(smt.compute_mutations(new_entries.clone()).unwrap()); - }) - }, -} - -// Benchmarks apply_mutations at different batch sizes. -// Setup: Creates fresh tree and computes mutations -// Measured: Only the apply_mutations call -// Tests: Performance scaling with mutation size (100, 1k, 10k entries) on a tree with 256 entries -benchmark_batch! { - large_smt_apply_mutations, - &[100, 1_000, 10_000], - |b: &mut criterion::Bencher, entry_count: usize| { - use criterion::BatchSize; - - let base_entries = generate_smt_entries_sequential(256); - let bench_dir = std::env::temp_dir().join("bench_apply_mutations"); - - b.iter_batched( - || { - std::fs::create_dir_all(&bench_dir).unwrap(); - let storage = RocksDbStorage::open(RocksDbConfig::new(&bench_dir)).unwrap(); - let smt = LargeSmt::with_entries(storage, base_entries.clone()).unwrap(); - let new_entries = generate_smt_entries_sequential(entry_count); - let mutations = smt.compute_mutations(new_entries).unwrap(); - - (smt, mutations, bench_dir.clone()) - }, - |(mut smt, mutations, bench_dir)| { - smt.apply_mutations(mutations).unwrap(); - drop(smt); - let _ = std::fs::remove_dir_all(&bench_dir); - }, - BatchSize::LargeInput, - ) - }, - |size| Some(criterion::Throughput::Elements(size as u64)) -} - -// Benchmarks apply_mutations_with_reversion at different batch sizes. -// Setup: Creates fresh tree and computes mutations -// Measured: Only the apply_mutations_with_reversion call -// Tests: Performance scaling with mutation size (100, 1k, 10k entries) on a tree with 256 entries -benchmark_batch! { - large_smt_apply_mutations_with_reversion, - &[100, 1_000, 10_000], - |b: &mut criterion::Bencher, entry_count: usize| { - use criterion::BatchSize; - - let base_entries = generate_smt_entries_sequential(256); - let bench_dir = std::env::temp_dir().join("bench_apply_mutations_with_reversion"); - - b.iter_batched( - || { - std::fs::create_dir_all(&bench_dir).unwrap(); - let storage = RocksDbStorage::open(RocksDbConfig::new(&bench_dir)).unwrap(); - let smt = LargeSmt::with_entries(storage, base_entries.clone()).unwrap(); - let new_entries = generate_smt_entries_sequential(entry_count); - let mutations = smt.compute_mutations(new_entries).unwrap(); - - (smt, mutations, bench_dir.clone()) - }, - |(mut smt, mutations, bench_dir)| { - let _ = smt.apply_mutations_with_reversion(mutations).unwrap(); - drop(smt); - let _ = std::fs::remove_dir_all(&bench_dir); - }, - BatchSize::LargeInput, - ) - }, - |size| Some(criterion::Throughput::Elements(size as u64)) -} - -benchmark_batch! { - large_smt_insert_batch, - &[1, 16, 32, 64, 128], - |b: &mut criterion::Bencher, insert_count: usize| { - let base_entries = generate_smt_entries_sequential(256); - let temp_dir = tempfile::TempDir::new().unwrap(); - let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap(); - let mut smt = LargeSmt::with_entries(storage, base_entries).unwrap(); - - b.iter(|| { - for _ in 0..insert_count { - let new_entries = generate_smt_entries_sequential(10_000); - smt.insert_batch(new_entries).unwrap(); - } - }) - }, - |size| Some(criterion::Throughput::Elements(size as u64)) -} - -criterion_group!( - large_smt_benchmark_group, - large_smt_open, - large_smt_compute_mutations, - large_smt_apply_mutations, - large_smt_apply_mutations_with_reversion, - large_smt_insert_batch, -); - -criterion_main!(large_smt_benchmark_group); diff --git a/miden-crypto/src/main.rs b/miden-crypto/src/main.rs index a52d388f15..7496033896 100644 --- a/miden-crypto/src/main.rs +++ b/miden-crypto/src/main.rs @@ -1,8 +1,6 @@ -use std::{path::PathBuf, time::Instant}; +use std::time::Instant; -use clap::{Parser, ValueEnum}; -#[cfg(feature = "rocksdb")] -use miden_crypto::merkle::smt::{RocksDbConfig, RocksDbStorage}; +use clap::Parser; use miden_crypto::{ EMPTY_WORD, Felt, ONE, Word, hash::rpo::Rpo256, @@ -25,24 +23,9 @@ pub struct BenchmarkCmd { /// Number of updates #[arg(short = 'u', long = "updates", default_value = "10000")] updates: usize, - /// Path for the benchmark database - #[clap(short = 'p', long = "path")] - storage_path: Option, - /// Open existing database and skip construction - #[clap(short = 'o', long = "open", default_value = "false")] - open: bool, /// Number of batch operations #[clap(short = 'b', long = "batches", default_value = "1")] batches: usize, - /// Storage backend to use at runtime: memory or rocksdb - #[arg(short = 's', long = "storage", value_enum, default_value = "memory")] - storage: StorageKind, -} - -#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] -pub enum StorageKind { - Memory, - Rocksdb, } fn main() { @@ -56,16 +39,9 @@ pub fn benchmark_smt() { let tree_size = args.size; let insertions = args.insertions; let updates = args.updates; - let storage_path = args.storage_path; let batches = args.batches; - println!( - "Running benchmark with {} storage", - match args.storage { - StorageKind::Memory => "memory", - StorageKind::Rocksdb => "rocksdb", - } - ); + println!("Running benchmark with memory storage"); assert!(updates <= tree_size, "Cannot update more than `size`"); // prepare the `leaves` vector for tree creation let mut entries = Vec::new(); @@ -75,11 +51,7 @@ pub fn benchmark_smt() { entries.push((key, value)); } - let mut tree = if args.open { - open_existing(storage_path, args.storage).unwrap() - } else { - construction(entries.clone(), tree_size, storage_path, args.storage).unwrap() - }; + let mut tree = construction(entries.clone(), tree_size).unwrap(); insertion(&mut tree, insertions).unwrap(); for _ in 0..batches { batched_insertion(&mut tree, insertions).unwrap(); @@ -92,12 +64,10 @@ pub fn benchmark_smt() { pub fn construction( entries: Vec<(Word, Word)>, size: usize, - database_path: Option, - storage: StorageKind, ) -> Result, LargeSmtError> { println!("Running a construction benchmark:"); let now = Instant::now(); - let storage = get_storage(database_path, false, storage); + let storage: Storage = Box::new(MemoryStorage::new()); let tree = LargeSmt::with_entries(storage, entries)?; let elapsed = now.elapsed().as_secs_f32(); println!("Constructed an SMT with {size} key-value pairs in {elapsed:.1} seconds"); @@ -106,18 +76,6 @@ pub fn construction( Ok(tree) } -pub fn open_existing( - storage_path: Option, - storage: StorageKind, -) -> Result, LargeSmtError> { - println!("Opening an existing database:"); - let now = Instant::now(); - let storage = get_storage(storage_path, true, storage); - let tree = LargeSmt::new(storage)?; - let elapsed = now.elapsed().as_secs_f32(); - println!("Opened an existing database in {elapsed:.1} seconds"); - Ok(tree) -} /// Runs the insertion benchmark for the [`Smt`]. pub fn insertion(tree: &mut LargeSmt, insertions: usize) -> Result<(), LargeSmtError> { println!("Running an insertion benchmark:"); @@ -275,36 +233,3 @@ pub fn proof_generation(tree: &mut LargeSmt) -> Result<(), LargeSmtErro Ok(()) } - -#[allow(unused_variables)] -fn get_storage(database_path: Option, open: bool, kind: StorageKind) -> Storage { - match kind { - StorageKind::Memory => Box::new(MemoryStorage::new()), - StorageKind::Rocksdb => { - #[cfg(feature = "rocksdb")] - { - let path = database_path - .unwrap_or_else(|| std::env::temp_dir().join("miden_crypto_benchmark")); - println!("Using database path: {}", path.display()); - if !open { - // delete the folder if it exists as we are creating a new database - if path.exists() { - std::fs::remove_dir_all(path.clone()).unwrap(); - } - std::fs::create_dir_all(path.clone()) - .expect("Failed to create database directory"); - } - let db = RocksDbStorage::open( - RocksDbConfig::new(path).with_cache_size(1 << 30).with_max_open_files(2048), - ) - .expect("Failed to open database"); - Box::new(db) - } - #[cfg(not(feature = "rocksdb"))] - { - eprintln!("rocksdb feature not enabled; falling back to memory storage"); - Box::new(MemoryStorage::new()) - } - }, - } -} diff --git a/miden-crypto/src/merkle/smt/large/batch_ops.rs b/miden-crypto/src/merkle/smt/large/batch_ops.rs index 5428be3101..b6304844ee 100644 --- a/miden-crypto/src/merkle/smt/large/batch_ops.rs +++ b/miden-crypto/src/merkle/smt/large/batch_ops.rs @@ -308,11 +308,11 @@ impl LargeSmt { /// ```no_run /// use miden_crypto::{ /// EMPTY_WORD, Felt, Word, - /// merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, + /// merkle::smt::{LargeSmt, MemoryStorage}, /// }; /// /// # fn main() -> Result<(), Box> { - /// let storage = RocksDbStorage::open(RocksDbConfig::new("/path/to/db"))?; + /// let storage = MemoryStorage::default(); /// let mut smt = LargeSmt::new(storage)?; /// /// let entries = vec![ diff --git a/miden-crypto/src/merkle/smt/large/mod.rs b/miden-crypto/src/merkle/smt/large/mod.rs index c3420b39a2..306fc6af43 100644 --- a/miden-crypto/src/merkle/smt/large/mod.rs +++ b/miden-crypto/src/merkle/smt/large/mod.rs @@ -2,114 +2,7 @@ //! //! `LargeSmt` stores the top of the tree (depths 0–23) in memory and persists the lower //! depths (24–64) in storage as fixed-size subtrees. This hybrid layout scales beyond RAM -//! while keeping common operations fast. With the `rocksdb` feature enabled, the lower -//! subtrees and leaves are stored in RocksDB. On reopen, the in-memory top is reconstructed -//! from cached depth-24 subtree roots. -//! -//! Examples below require the `rocksdb` feature. -//! -//! Open an existing RocksDB-backed tree: -//! ```no_run -//! use miden_crypto::merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}; -//! -//! # fn main() -> Result<(), Box> { -//! let storage = RocksDbStorage::open(RocksDbConfig::new("/path/to/db"))?; -//! let smt = LargeSmt::new(storage)?; // reconstructs in-memory top if data exists -//! let _root = smt.root(); -//! # Ok(()) -//! # } -//! ``` -//! -//! Initialize an empty RocksDB-backed tree and bulk-load entries: -//! ```no_run -//! use miden_crypto::{ -//! Felt, Word, -//! merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, -//! }; -//! -//! # fn main() -> Result<(), Box> { -//! let path = "/path/to/new-db"; -//! if std::path::Path::new(path).exists() { -//! std::fs::remove_dir_all(path)?; -//! } -//! std::fs::create_dir_all(path)?; -//! -//! let storage = RocksDbStorage::open(RocksDbConfig::new(path))?; -//! let mut smt = LargeSmt::new(storage)?; // empty tree -//! -//! // Prepare initial entries -//! let entries = vec![ -//! ( -//! Word::new([Felt::new(1), Felt::new(0), Felt::new(0), Felt::new(0)]), -//! Word::new([Felt::new(10), Felt::new(20), Felt::new(30), Felt::new(40)]), -//! ), -//! ( -//! Word::new([Felt::new(2), Felt::new(0), Felt::new(0), Felt::new(0)]), -//! Word::new([Felt::new(11), Felt::new(22), Felt::new(33), Felt::new(44)]), -//! ), -//! ]; -//! -//! // Bulk insert entries (faster than compute_mutations + apply_mutations) -//! smt.insert_batch(entries)?; -//! # Ok(()) -//! # } -//! ``` -//! -//! Apply batch updates (insertions and deletions): -//! ```no_run -//! use miden_crypto::{ -//! EMPTY_WORD, Felt, Word, -//! merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, -//! }; -//! -//! # fn main() -> Result<(), Box> { -//! let storage = RocksDbStorage::open(RocksDbConfig::new("/path/to/db"))?; -//! let mut smt = LargeSmt::new(storage)?; -//! -//! let k1 = Word::new([Felt::new(101), Felt::new(0), Felt::new(0), Felt::new(0)]); -//! let v1 = Word::new([Felt::new(1), Felt::new(2), Felt::new(3), Felt::new(4)]); -//! let k2 = Word::new([Felt::new(202), Felt::new(0), Felt::new(0), Felt::new(0)]); -//! let k3 = Word::new([Felt::new(303), Felt::new(0), Felt::new(0), Felt::new(0)]); -//! let v3 = Word::new([Felt::new(7), Felt::new(7), Felt::new(7), Felt::new(7)]); -//! -//! // EMPTY_WORD marks deletions -//! let updates = vec![(k1, v1), (k2, EMPTY_WORD), (k3, v3)]; -//! smt.insert_batch(updates)?; -//! # Ok(()) -//! # } -//! ``` -//! -//! Quick initialization with `with_entries` (best for modest datasets/tests): -//! ```no_run -//! use miden_crypto::{ -//! Felt, Word, -//! merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, -//! }; -//! -//! # fn main() -> Result<(), Box> { -//! // Note: `with_entries` expects an EMPTY storage and performs an all-at-once build. -//! // Prefer `insert_batch` for large bulk loads. -//! let path = "/path/to/new-db"; -//! if std::path::Path::new(path).exists() { -//! std::fs::remove_dir_all(path)?; -//! } -//! std::fs::create_dir_all(path)?; -//! -//! let storage = RocksDbStorage::open(RocksDbConfig::new(path))?; -//! let entries = vec![ -//! ( -//! Word::new([Felt::new(1), Felt::new(0), Felt::new(0), Felt::new(0)]), -//! Word::new([Felt::new(10), Felt::new(20), Felt::new(30), Felt::new(40)]), -//! ), -//! ( -//! Word::new([Felt::new(2), Felt::new(0), Felt::new(0), Felt::new(0)]), -//! Word::new([Felt::new(11), Felt::new(22), Felt::new(33), Felt::new(44)]), -//! ), -//! ]; -//! let _smt = LargeSmt::with_entries(storage, entries)?; -//! # Ok(()) -//! # } -//! ``` +//! while keeping common operations fast. //! //! ## Performance and Memory Considerations //! @@ -164,8 +57,6 @@ mod storage; pub use storage::{ MemoryStorage, SmtStorage, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate, }; -#[cfg(feature = "rocksdb")] -pub use storage::{RocksDbConfig, RocksDbStorage}; mod iter; pub use iter::LargeSmtInnerNodeIterator; diff --git a/miden-crypto/src/merkle/smt/large/storage/mod.rs b/miden-crypto/src/merkle/smt/large/storage/mod.rs index a15530c65d..94326ea9cb 100644 --- a/miden-crypto/src/merkle/smt/large/storage/mod.rs +++ b/miden-crypto/src/merkle/smt/large/storage/mod.rs @@ -12,11 +12,6 @@ use crate::{ mod error; pub use error::StorageError; -#[cfg(feature = "rocksdb")] -mod rocksdb; -#[cfg(feature = "rocksdb")] -pub use rocksdb::{RocksDbConfig, RocksDbStorage}; - mod memory; pub use memory::MemoryStorage; diff --git a/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs b/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs deleted file mode 100644 index f5271013de..0000000000 --- a/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs +++ /dev/null @@ -1,1314 +0,0 @@ -use alloc::{boxed::Box, vec::Vec}; -use std::{path::PathBuf, sync::Arc}; - -use rocksdb::{ - BlockBasedOptions, Cache, ColumnFamilyDescriptor, DB, DBCompactionStyle, DBCompressionType, - DBIteratorWithThreadMode, FlushOptions, IteratorMode, Options, ReadOptions, WriteBatch, -}; -use winter_utils::{Deserializable, Serializable}; - -use super::{SmtStorage, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate}; -use crate::{ - EMPTY_WORD, Word, - merkle::{ - NodeIndex, - smt::{ - InnerNode, Map, SmtLeaf, - large::{IN_MEMORY_DEPTH, LargeSmt, subtree::Subtree}, - }, - }, -}; - -/// The name of the RocksDB column family used for storing SMT leaves. -const LEAVES_CF: &str = "leaves"; -/// The names of the RocksDB column families used for storing SMT subtrees (deep nodes). -const SUBTREE_24_CF: &str = "st24"; -const SUBTREE_32_CF: &str = "st32"; -const SUBTREE_40_CF: &str = "st40"; -const SUBTREE_48_CF: &str = "st48"; -const SUBTREE_56_CF: &str = "st56"; - -/// The name of the RocksDB column family used for storing metadata (e.g., root, counts). -const METADATA_CF: &str = "metadata"; -/// The name of the RocksDB column family used for storing level 24 hashes for fast tree rebuilding. -const DEPTH_24_CF: &str = "depth24"; - -/// The key used in the `METADATA_CF` column family to store the SMT's root hash. -const ROOT_KEY: &[u8] = b"smt_root"; -/// The key used in the `METADATA_CF` column family to store the total count of non-empty leaves. -const LEAF_COUNT_KEY: &[u8] = b"leaf_count"; -/// The key used in the `METADATA_CF` column family to store the total count of key-value entries. -const ENTRY_COUNT_KEY: &[u8] = b"entry_count"; - -/// A RocksDB-backed persistent storage implementation for a Sparse Merkle Tree (SMT). -/// -/// Implements the `SmtStorage` trait, providing durable storage for SMT components -/// including leaves, subtrees (for deeper parts of the tree), and metadata like the SMT root -/// and counts. It leverages RocksDB column families to organize data: -/// - `LEAVES_CF` ("leaves"): Stores `SmtLeaf` data, keyed by their logical u64 index. -/// - `SUBTREE_24_CF` ("st24"): Stores serialized `Subtree` data at depth 24, keyed by their root -/// `NodeIndex`. -/// - `SUBTREE_32_CF` ("st32"): Stores serialized `Subtree` data at depth 32, keyed by their root -/// `NodeIndex`. -/// - `SUBTREE_40_CF` ("st40"): Stores serialized `Subtree` data at depth 40, keyed by their root -/// `NodeIndex`. -/// - `SUBTREE_48_CF` ("st48"): Stores serialized `Subtree` data at depth 48, keyed by their root -/// `NodeIndex`. -/// - `SUBTREE_56_CF` ("st56"): Stores serialized `Subtree` data at depth 56, keyed by their root -/// `NodeIndex`. -/// - `METADATA_CF` ("metadata"): Stores overall SMT metadata such as the current root hash, total -/// leaf count, and total entry count. -#[derive(Debug, Clone)] -pub struct RocksDbStorage { - db: Arc, -} - -impl RocksDbStorage { - /// Opens or creates a RocksDB database at the specified `path` and configures it for SMT - /// storage. - /// - /// This method sets up the necessary column families (`leaves`, `subtrees`, `metadata`) - /// and applies various RocksDB options for performance, such as caching, bloom filters, - /// and compaction strategies tailored for SMT workloads. - /// - /// # Errors - /// Returns `StorageError::Backend` if the database cannot be opened or configured, - /// for example, due to path issues, permissions, or RocksDB internal errors. - pub fn open(config: RocksDbConfig) -> Result { - // Base DB options - let mut db_opts = Options::default(); - // Create DB if it doesn't exist - db_opts.create_if_missing(true); - // Auto-create missing column families - db_opts.create_missing_column_families(true); - // Tune compaction threads to match CPU cores - db_opts.increase_parallelism(rayon::current_num_threads() as i32); - // Limit the number of open file handles - db_opts.set_max_open_files(config.max_open_files); - // Parallelize flush/compaction up to CPU count - db_opts.set_max_background_jobs(rayon::current_num_threads() as i32); - // Maximum WAL size - db_opts.set_max_total_wal_size(512 * 1024 * 1024); - - // Shared block cache across all column families - let cache = Cache::new_lru_cache(config.cache_size); - - // Common table options for bloom filtering and cache - let mut table_opts = BlockBasedOptions::default(); - // Use shared LRU cache for block data - table_opts.set_block_cache(&cache); - table_opts.set_bloom_filter(10.0, false); - // Enable whole-key bloom filtering (better with point lookups) - table_opts.set_whole_key_filtering(true); - // Pin L0 filter and index blocks in cache (improves performance) - table_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); - - // Column family for leaves - let mut leaves_opts = Options::default(); - leaves_opts.set_block_based_table_factory(&table_opts); - // 128 MB memtable - leaves_opts.set_write_buffer_size(128 << 20); - // Allow up to 3 memtables - leaves_opts.set_max_write_buffer_number(3); - leaves_opts.set_min_write_buffer_number_to_merge(1); - // Do not retain flushed memtables in memory - leaves_opts.set_max_write_buffer_size_to_maintain(0); - // Use level-based compaction - leaves_opts.set_compaction_style(DBCompactionStyle::Level); - // 512 MB target file size - leaves_opts.set_target_file_size_base(512 << 20); - leaves_opts.set_target_file_size_multiplier(2); - // LZ4 compression - leaves_opts.set_compression_type(DBCompressionType::Lz4); - // Set level-based compaction parameters - leaves_opts.set_level_zero_file_num_compaction_trigger(8); - - // Helper to build subtree CF options with correct prefix length - fn subtree_cf(cache: &Cache, bloom_filter_bits: f64) -> Options { - let mut tbl = BlockBasedOptions::default(); - // Use shared LRU cache for block data - tbl.set_block_cache(cache); - // Set bloom filter for subtree lookups - tbl.set_bloom_filter(bloom_filter_bits, false); - // Enable whole-key bloom filtering - tbl.set_whole_key_filtering(true); - // Pin L0 filter and index blocks in cache - tbl.set_pin_l0_filter_and_index_blocks_in_cache(true); - - let mut opts = Options::default(); - opts.set_block_based_table_factory(&tbl); - // 128 MB memtable - opts.set_write_buffer_size(128 << 20); - opts.set_max_write_buffer_number(3); - opts.set_min_write_buffer_number_to_merge(1); - // Do not retain flushed memtables in memory - opts.set_max_write_buffer_size_to_maintain(0); - // Use level-based compaction - opts.set_compaction_style(DBCompactionStyle::Level); - // Trigger compaction at 4 L0 files - opts.set_level_zero_file_num_compaction_trigger(4); - // 512 MB target file size - opts.set_target_file_size_base(512 << 20); - opts.set_target_file_size_multiplier(2); - // LZ4 compression - opts.set_compression_type(DBCompressionType::Lz4); - // Set level-based compaction parameters - opts.set_level_zero_file_num_compaction_trigger(8); - opts - } - - let mut depth24_opts = Options::default(); - depth24_opts.set_compression_type(DBCompressionType::Lz4); - depth24_opts.set_block_based_table_factory(&table_opts); - - // Metadata CF with no compression - let mut metadata_opts = Options::default(); - metadata_opts.set_compression_type(DBCompressionType::None); - - // Define column families with tailored options - let cfs = vec![ - ColumnFamilyDescriptor::new(LEAVES_CF, leaves_opts), - ColumnFamilyDescriptor::new(SUBTREE_24_CF, subtree_cf(&cache, 8.0)), - ColumnFamilyDescriptor::new(SUBTREE_32_CF, subtree_cf(&cache, 10.0)), - ColumnFamilyDescriptor::new(SUBTREE_40_CF, subtree_cf(&cache, 10.0)), - ColumnFamilyDescriptor::new(SUBTREE_48_CF, subtree_cf(&cache, 12.0)), - ColumnFamilyDescriptor::new(SUBTREE_56_CF, subtree_cf(&cache, 12.0)), - ColumnFamilyDescriptor::new(METADATA_CF, metadata_opts), - ColumnFamilyDescriptor::new(DEPTH_24_CF, depth24_opts), - ]; - - // Open the database with our tuned CFs - let db = DB::open_cf_descriptors(&db_opts, config.path, cfs)?; - - Ok(Self { db: Arc::new(db) }) - } - - /// Syncs the RocksDB database to disk. - /// - /// This ensures that all data is persisted to disk. - /// - /// # Errors - /// - Returns `StorageError::Backend` if the flush operation fails. - fn sync(&self) -> Result<(), StorageError> { - let mut fopts = FlushOptions::default(); - fopts.set_wait(true); - - for name in [ - LEAVES_CF, - SUBTREE_24_CF, - SUBTREE_32_CF, - SUBTREE_40_CF, - SUBTREE_48_CF, - SUBTREE_56_CF, - METADATA_CF, - DEPTH_24_CF, - ] { - let cf = self.cf_handle(name)?; - self.db.flush_cf_opt(cf, &fopts)?; - } - - self.db.flush_wal(true)?; - Ok(()) - } - - /// Converts an index (u64) into a fixed-size byte array for use as a RocksDB key. - #[inline(always)] - fn index_db_key(index: u64) -> [u8; 8] { - index.to_be_bytes() - } - - /// Converts a `NodeIndex` (for a subtree root) into a `KeyBytes` for use as a RocksDB key. - /// The `KeyBytes` is a wrapper around a 8-byte value with a variable-length prefix. - #[inline(always)] - fn subtree_db_key(index: NodeIndex) -> KeyBytes { - let keep = match index.depth() { - 24 => 3, - 32 => 4, - 40 => 5, - 48 => 6, - 56 => 7, - d => panic!("unsupported depth {d}"), - }; - KeyBytes::new(index.value(), keep) - } - - /// Retrieves a handle to a RocksDB column family by its name. - /// - /// # Errors - /// Returns `StorageError::Backend` if the column family with the given `name` does not - /// exist. - fn cf_handle(&self, name: &str) -> Result<&rocksdb::ColumnFamily, StorageError> { - self.db - .cf_handle(name) - .ok_or_else(|| StorageError::Unsupported(format!("unknown column family `{name}`"))) - } - - /* helper: CF handle from NodeIndex ------------------------------------- */ - #[inline(always)] - fn subtree_cf(&self, index: NodeIndex) -> &rocksdb::ColumnFamily { - let name = cf_for_depth(index.depth()); - self.cf_handle(name).expect("CF handle missing") - } -} - -impl SmtStorage for RocksDbStorage { - /// Retrieves the SMT root hash from the `METADATA_CF` column family. - /// - /// # Errors - /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error - /// occurs. - /// - `StorageError::DeserializationError`: If the retrieved root hash bytes cannot be - /// deserialized. - fn get_root(&self) -> Result, StorageError> { - let cf = self.cf_handle(METADATA_CF)?; - match self.db.get_cf(cf, ROOT_KEY)? { - Some(bytes) => { - let digest = Word::read_from_bytes(&bytes)?; - Ok(Some(digest)) - }, - None => Ok(None), - } - } - - /// Stores the SMT root hash in the `METADATA_CF` column family. - /// - /// # Errors - /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error - /// occurs. - fn set_root(&self, root: Word) -> Result<(), StorageError> { - let cf = self.cf_handle(METADATA_CF)?; - self.db.put_cf(cf, ROOT_KEY, root.to_bytes())?; - Ok(()) - } - - /// Retrieves the total count of non-empty leaves from the `METADATA_CF` column family. - /// Returns 0 if the count is not found. - /// - /// # Errors - /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error - /// occurs. - /// - `StorageError::BadValueLen`: If the retrieved count bytes are invalid. - fn leaf_count(&self) -> Result { - let cf = self.cf_handle(METADATA_CF)?; - self.db.get_cf(cf, LEAF_COUNT_KEY)?.map_or(Ok(0), |bytes| { - let arr: [u8; 8] = - bytes.as_slice().try_into().map_err(|_| StorageError::BadValueLen { - what: "leaf count", - expected: 8, - found: bytes.len(), - })?; - Ok(usize::from_be_bytes(arr)) - }) - } - - /// Retrieves the total count of key-value entries from the `METADATA_CF` column family. - /// Returns 0 if the count is not found. - /// - /// # Errors - /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error - /// occurs. - /// - `StorageError::BadValueLen`: If the retrieved count bytes are invalid. - fn entry_count(&self) -> Result { - let cf = self.cf_handle(METADATA_CF)?; - self.db.get_cf(cf, ENTRY_COUNT_KEY)?.map_or(Ok(0), |bytes| { - let arr: [u8; 8] = - bytes.as_slice().try_into().map_err(|_| StorageError::BadValueLen { - what: "entry count", - expected: 8, - found: bytes.len(), - })?; - Ok(usize::from_be_bytes(arr)) - }) - } - - /// Inserts a key-value pair into the SMT leaf at the specified logical `index`. - /// - /// This operation involves: - /// 1. Retrieving the current leaf (if any) at `index`. - /// 2. Inserting the new key-value pair into the leaf. - /// 3. Updating the leaf and entry counts in the metadata column family. - /// 4. Writing all changes (leaf data, counts) to RocksDB in a single batch. - /// - /// Note: This only updates the leaf. Callers are responsible for recomputing and - /// persisting the corresponding inner nodes. - /// - /// # Errors - /// - `StorageError::Backend`: If column families are missing or a RocksDB error occurs. - /// - `StorageError::DeserializationError`: If existing leaf data is corrupt. - fn insert_value( - &self, - index: u64, - key: Word, - value: Word, - ) -> Result, StorageError> { - debug_assert_ne!(value, EMPTY_WORD); - - let mut batch = WriteBatch::default(); - - // Fetch initial counts. - let mut current_leaf_count = self.leaf_count()?; - let mut current_entry_count = self.entry_count()?; - - let leaves_cf = self.cf_handle(LEAVES_CF)?; - let db_key = Self::index_db_key(index); - - let maybe_leaf = self.get_leaf(index)?; - - let value_to_return: Option = match maybe_leaf { - Some(mut existing_leaf) => { - let old_value = existing_leaf.insert(key, value).expect("Failed to insert value"); - // Determine if the overall SMT entry_count needs to change. - // entry_count increases if: - // 1. The key was not present in this leaf before (`old_value` is `None`). - // 2. The key was present but held `EMPTY_WORD` (`old_value` is - // `Some(EMPTY_WORD)`). - if old_value.is_none_or(|old_v| old_v == EMPTY_WORD) { - current_entry_count += 1; - } - // current_leaf_count does not change because the leaf itself already existed. - batch.put_cf(leaves_cf, db_key, existing_leaf.to_bytes()); - old_value - }, - None => { - // Leaf at `index` does not exist, so create a new one. - let new_leaf = SmtLeaf::Single((key, value)); - // A new leaf is created. - current_leaf_count += 1; - // This new leaf contains one new SMT entry. - current_entry_count += 1; - batch.put_cf(leaves_cf, db_key, new_leaf.to_bytes()); - // No previous value, as the leaf (and thus the key in it) was new. - None - }, - }; - - // Add updated metadata counts to the batch. - let metadata_cf = self.cf_handle(METADATA_CF)?; - batch.put_cf(metadata_cf, LEAF_COUNT_KEY, current_leaf_count.to_be_bytes()); - batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, current_entry_count.to_be_bytes()); - - // Atomically write all changes (leaf data and metadata counts). - self.db.write(batch)?; - - Ok(value_to_return) - } - - /// Removes a key-value pair from the SMT leaf at the specified logical `index`. - /// - /// This operation involves: - /// 1. Retrieving the leaf at `index`. - /// 2. Removing the `key` from the leaf. If the leaf becomes empty, it's deleted from RocksDB. - /// 3. Updating the leaf and entry counts in the metadata column family. - /// 4. Writing all changes (leaf data/deletion, counts) to RocksDB in a single batch. - /// - /// Returns `Ok(None)` if the leaf at `index` does not exist or the `key` is not found. - /// - /// Note: This only updates the leaf. Callers are responsible for recomputing and - /// persisting the corresponding inner nodes. - /// - /// # Errors - /// - `StorageError::Backend`: If column families are missing or a RocksDB error occurs. - /// - `StorageError::DeserializationError`: If existing leaf data is corrupt. - fn remove_value(&self, index: u64, key: Word) -> Result, StorageError> { - let Some(mut leaf) = self.get_leaf(index)? else { - return Ok(None); - }; - - let mut batch = WriteBatch::default(); - let cf = self.cf_handle(LEAVES_CF)?; - let metadata_cf = self.cf_handle(METADATA_CF)?; - let db_key = Self::index_db_key(index); - let mut entry_count = self.entry_count()?; - let mut leaf_count = self.leaf_count()?; - - let (current_value, is_empty) = leaf.remove(key); - if let Some(current_value) = current_value - && current_value != EMPTY_WORD - { - entry_count -= 1; - } - if is_empty { - leaf_count -= 1; - batch.delete_cf(cf, db_key); - } else { - batch.put_cf(cf, db_key, leaf.to_bytes()); - } - batch.put_cf(metadata_cf, LEAF_COUNT_KEY, leaf_count.to_be_bytes()); - batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, entry_count.to_be_bytes()); - self.db.write(batch)?; - Ok(current_value) - } - - /// Retrieves a single SMT leaf node by its logical `index` from the `LEAVES_CF` column family. - /// - /// # Errors - /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. - /// - `StorageError::DeserializationError`: If the retrieved leaf data is corrupt. - fn get_leaf(&self, index: u64) -> Result, StorageError> { - let cf = self.cf_handle(LEAVES_CF)?; - let key = Self::index_db_key(index); - match self.db.get_cf(cf, key)? { - Some(bytes) => { - let leaf = SmtLeaf::read_from_bytes(&bytes)?; - Ok(Some(leaf)) - }, - None => Ok(None), - } - } - - /// Sets or updates multiple SMT leaf nodes in the `LEAVES_CF` column family. - /// - /// This method performs a batch write to RocksDB. It also updates the global - /// leaf and entry counts in the `METADATA_CF` based on the provided `leaves` map, - /// overwriting any previous counts. - /// - /// Note: This method assumes the provided `leaves` map represents the entirety - /// of leaves to be stored or that counts are being explicitly reset. - /// Note: This only updates the leaves. Callers are responsible for recomputing and - /// persisting the corresponding inner nodes. - /// - /// # Errors - /// - `StorageError::Backend`: If column families are missing or a RocksDB error occurs. - fn set_leaves(&self, leaves: Map) -> Result<(), StorageError> { - let cf = self.cf_handle(LEAVES_CF)?; - let leaf_count: usize = leaves.len(); - let entry_count: usize = leaves.values().map(|leaf| leaf.entries().len()).sum(); - let mut batch = WriteBatch::default(); - for (idx, leaf) in leaves { - let key = Self::index_db_key(idx); - let value = leaf.to_bytes(); - batch.put_cf(cf, key, &value); - } - let metadata_cf = self.cf_handle(METADATA_CF)?; - batch.put_cf(metadata_cf, LEAF_COUNT_KEY, leaf_count.to_be_bytes()); - batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, entry_count.to_be_bytes()); - self.db.write(batch)?; - Ok(()) - } - - /// Removes a single SMT leaf node by its logical `index` from the `LEAVES_CF` column family. - /// - /// Important: This method currently *does not* update the global leaf and entry counts - /// in the metadata. Callers are responsible for managing these counts separately - /// if using this method directly, or preferably use `apply` or `remove_value` which handle - /// counts. - /// - /// Note: This only removes the leaf. Callers are responsible for recomputing and - /// persisting the corresponding inner nodes. - /// - /// # Errors - /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. - /// - `StorageError::DeserializationError`: If the retrieved (to be returned) leaf data is - /// corrupt. - fn remove_leaf(&self, index: u64) -> Result, StorageError> { - let key = Self::index_db_key(index); - let cf = self.cf_handle(LEAVES_CF)?; - let old_bytes = self.db.get_cf(cf, key)?; - self.db.delete_cf(cf, key)?; - Ok(old_bytes - .map(|bytes| SmtLeaf::read_from_bytes(&bytes).expect("failed to deserialize leaf"))) - } - - /// Retrieves multiple SMT leaf nodes by their logical `indices` using RocksDB's `multi_get_cf`. - /// - /// # Errors - /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. - /// - `StorageError::DeserializationError`: If any retrieved leaf data is corrupt. - fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { - let cf = self.cf_handle(LEAVES_CF)?; - let db_keys: Vec<[u8; 8]> = indices.iter().map(|&idx| Self::index_db_key(idx)).collect(); - let results = self.db.multi_get_cf(db_keys.iter().map(|k| (cf, k.as_ref()))); - - results - .into_iter() - .map(|result| match result { - Ok(Some(bytes)) => Ok(Some(SmtLeaf::read_from_bytes(&bytes)?)), - Ok(None) => Ok(None), - Err(e) => Err(e.into()), - }) - .collect() - } - - /// Returns true if the storage has any leaves. - /// - /// # Errors - /// Returns `StorageError` if the storage read operation fails. - fn has_leaves(&self) -> Result { - Ok(self.leaf_count()? > 0) - } - - /// Batch-retrieves multiple subtrees from RocksDB by their node indices. - /// - /// This method groups requests by subtree depth into column family buckets, - /// then performs parallel `multi_get` operations to efficiently retrieve - /// all subtrees. Results are deserialized and placed in the same order as - /// the input indices. - /// - /// Note: Retrieval is performed in parallel. If multiple errors occur (e.g., - /// deserialization or backend errors), only the first one encountered is returned. - /// Other errors will be discarded. - /// - /// # Parameters - /// - `indices`: A slice of subtree root indices to retrieve. - /// - /// # Returns - /// - A `Vec>` where each index corresponds to the original input. - /// - `Ok(...)` if all fetches succeed. - /// - `Err(StorageError)` if any RocksDB access or deserialization fails. - fn get_subtree(&self, index: NodeIndex) -> Result, StorageError> { - let cf = self.subtree_cf(index); - let key = Self::subtree_db_key(index); - match self.db.get_cf(cf, key)? { - Some(bytes) => { - let subtree = Subtree::from_vec(index, &bytes)?; - Ok(Some(subtree)) - }, - None => Ok(None), - } - } - - /// Batch-retrieves multiple subtrees from RocksDB by their node indices. - /// - /// This method groups requests by subtree depth into column family buckets, - /// then performs parallel `multi_get` operations to efficiently retrieve - /// all subtrees. Results are deserialized and placed in the same order as - /// the input indices. - /// - /// # Parameters - /// - `indices`: A slice of subtree root indices to retrieve. - /// - /// # Returns - /// - A `Vec>` where each index corresponds to the original input. - /// - `Ok(...)` if all fetches succeed. - /// - `Err(StorageError)` if any RocksDB access or deserialization fails. - fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError> { - use rayon::prelude::*; - - let mut depth_buckets: [Vec<(usize, NodeIndex)>; 5] = Default::default(); - - for (original_index, &node_index) in indices.iter().enumerate() { - let depth = node_index.depth(); - let bucket_index = match depth { - 56 => 0, - 48 => 1, - 40 => 2, - 32 => 3, - 24 => 4, - _ => { - return Err(StorageError::Unsupported(format!( - "unsupported subtree depth {depth}" - ))); - }, - }; - depth_buckets[bucket_index].push((original_index, node_index)); - } - let mut results = vec![None; indices.len()]; - - // Process depth buckets in parallel - let bucket_results: Result, StorageError> = depth_buckets - .into_par_iter() - .enumerate() - .filter(|(_, bucket)| !bucket.is_empty()) - .map( - |(bucket_index, bucket)| -> Result)>, StorageError> { - let depth = LargeSmt::::SUBTREE_DEPTHS[bucket_index]; - let cf = self.cf_handle(cf_for_depth(depth))?; - let keys: Vec<_> = - bucket.iter().map(|(_, idx)| Self::subtree_db_key(*idx)).collect(); - - let db_results = self.db.multi_get_cf(keys.iter().map(|k| (cf, k.as_ref()))); - - // Process results for this bucket - bucket - .into_iter() - .zip(db_results) - .map(|((original_index, node_index), db_result)| { - let subtree = match db_result { - Ok(Some(bytes)) => Some(Subtree::from_vec(node_index, &bytes)?), - Ok(None) => None, - Err(e) => return Err(e.into()), - }; - Ok((original_index, subtree)) - }) - .collect() - }, - ) - .collect(); - - // Flatten results and place them in correct positions - for bucket_result in bucket_results? { - for (original_index, subtree) in bucket_result { - results[original_index] = subtree; - } - } - - Ok(results) - } - - /// Stores a single subtree in RocksDB and optionally updates the depth-24 root cache. - /// - /// The subtree is serialized and written to its corresponding column family. - /// If it's a depth-24 subtree, the root node’s hash is also stored in the - /// dedicated `DEPTH_24_CF` cache to support top-level reconstruction. - /// - /// # Parameters - /// - `subtree`: A reference to the subtree to be stored. - /// - /// # Errors - /// - Returns `StorageError` if column family lookup, serialization, or the write operation - /// fails. - fn set_subtree(&self, subtree: &Subtree) -> Result<(), StorageError> { - let subtrees_cf = self.subtree_cf(subtree.root_index()); - let mut batch = WriteBatch::default(); - - let key = Self::subtree_db_key(subtree.root_index()); - let value = subtree.to_vec(); - batch.put_cf(subtrees_cf, key, value); - - // Also update level 24 hash cache if this is a level 24 subtree - if subtree.root_index().depth() == IN_MEMORY_DEPTH { - let root_hash = subtree - .get_inner_node(subtree.root_index()) - .ok_or_else(|| StorageError::Unsupported("Subtree root node not found".into()))? - .hash(); - - let depth24_cf = self.cf_handle(DEPTH_24_CF)?; - let hash_key = Self::index_db_key(subtree.root_index().value()); - batch.put_cf(depth24_cf, hash_key, root_hash.to_bytes()); - } - - self.db.write(batch)?; - Ok(()) - } - - /// Bulk-writes subtrees to storage (bypassing WAL). - /// - /// This method writes a vector of serialized `Subtree` objects directly to their - /// corresponding RocksDB column families based on their root index. - /// - /// ⚠️ **Warning:** This function should only be used during **initial SMT construction**. - /// It disables the WAL, meaning writes are **not crash-safe** and can result in data loss - /// if the process terminates unexpectedly. - /// - /// # Parameters - /// - `subtrees`: A vector of `Subtree` objects to be serialized and persisted. - /// - /// # Errors - /// - Returns `StorageError::Backend` if any column family lookup or RocksDB write fails. - fn set_subtrees(&self, subtrees: Vec) -> Result<(), StorageError> { - let depth24_cf = self.cf_handle(DEPTH_24_CF)?; - let mut batch = WriteBatch::default(); - - for subtree in subtrees { - let subtrees_cf = self.subtree_cf(subtree.root_index()); - let key = Self::subtree_db_key(subtree.root_index()); - let value = subtree.to_vec(); - batch.put_cf(subtrees_cf, key, value); - - if subtree.root_index().depth() == IN_MEMORY_DEPTH - && let Some(root_node) = subtree.get_inner_node(subtree.root_index()) - { - let hash_key = Self::index_db_key(subtree.root_index().value()); - batch.put_cf(depth24_cf, hash_key, root_node.hash().to_bytes()); - } - } - - self.db.write(batch)?; - Ok(()) - } - - /// Removes a single SMT Subtree from storage, identified by its root `NodeIndex`. - /// - /// # Errors - /// - `StorageError::Backend`: If the subtrees column family is missing or a RocksDB error - /// occurs. - fn remove_subtree(&self, index: NodeIndex) -> Result<(), StorageError> { - let subtrees_cf = self.subtree_cf(index); - let mut batch = WriteBatch::default(); - - let key = Self::subtree_db_key(index); - batch.delete_cf(subtrees_cf, key); - - // Also remove level 24 hash cache if this is a level 24 subtree - if index.depth() == IN_MEMORY_DEPTH { - let depth24_cf = self.cf_handle(DEPTH_24_CF)?; - let hash_key = Self::index_db_key(index.value()); - batch.delete_cf(depth24_cf, hash_key); - } - - self.db.write(batch)?; - Ok(()) - } - - /// Retrieves a single inner node (non-leaf node) from within a Subtree. - /// - /// This method is intended for accessing nodes at depths greater than or equal to - /// `IN_MEMORY_DEPTH`. It first finds the appropriate Subtree containing the `index`, then - /// delegates to `Subtree::get_inner_node()`. - /// - /// # Errors - /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. - /// - `StorageError::Value`: If the containing Subtree data is corrupt. - fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { - if index.depth() < IN_MEMORY_DEPTH { - return Err(StorageError::Unsupported( - "Cannot get inner node from upper part of the tree".into(), - )); - } - let subtree_root_index = Subtree::find_subtree_root(index); - Ok(self - .get_subtree(subtree_root_index)? - .and_then(|subtree| subtree.get_inner_node(index))) - } - - /// Sets or updates a single inner node (non-leaf node) within a Subtree. - /// - /// This method is intended for `index.depth() >= IN_MEMORY_DEPTH`. - /// If the target Subtree does not exist, it is created. The `node` is then - /// inserted into the Subtree, and the modified Subtree is written back to storage. - /// - /// # Errors - /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. - /// - `StorageError::Value`: If existing Subtree data is corrupt. - fn set_inner_node( - &self, - index: NodeIndex, - node: InnerNode, - ) -> Result, StorageError> { - if index.depth() < IN_MEMORY_DEPTH { - return Err(StorageError::Unsupported( - "Cannot set inner node in upper part of the tree".into(), - )); - } - - let subtree_root_index = Subtree::find_subtree_root(index); - let mut subtree = self - .get_subtree(subtree_root_index)? - .unwrap_or_else(|| Subtree::new(subtree_root_index)); - let old_node = subtree.insert_inner_node(index, node); - self.set_subtree(&subtree)?; - Ok(old_node) - } - - /// Removes a single inner node (non-leaf node) from within a Subtree. - /// - /// This method is intended for `index.depth() >= IN_MEMORY_DEPTH`. - /// If the Subtree becomes empty after removing the node, the Subtree itself - /// is removed from storage. - /// - /// # Errors - /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. - /// - `StorageError::Value`: If existing Subtree data is corrupt. - fn remove_inner_node(&self, index: NodeIndex) -> Result, StorageError> { - if index.depth() < IN_MEMORY_DEPTH { - return Err(StorageError::Unsupported( - "Cannot remove inner node from upper part of the tree".into(), - )); - } - - let subtree_root_index = Subtree::find_subtree_root(index); - self.get_subtree(subtree_root_index) - .and_then(|maybe_subtree| match maybe_subtree { - Some(mut subtree) => { - let old_node = subtree.remove_inner_node(index); - let db_operation_result = if subtree.is_empty() { - self.remove_subtree(subtree_root_index) - } else { - self.set_subtree(&subtree) - }; - db_operation_result.map(|_| old_node) - }, - None => Ok(None), - }) - } - - /// Applies a batch of `StorageUpdates` atomically to the RocksDB backend. - /// - /// This is the primary method for persisting changes to the SMT. It constructs a single - /// RocksDB `WriteBatch` containing all specified changes: - /// - Leaf updates/deletions in `LEAVES_CF`. - /// - Subtree updates/deletions in `SUBTREE_24_CF`, `SUBTREE_32_CF`, `SUBTREE_40_CF`, - /// `SUBTREE_48_CF`, `SUBTREE_56_CF`. - /// - Updates to leaf and entry counts in `METADATA_CF` based on `leaf_count_delta` and - /// `entry_count_delta`. - /// - Sets the new SMT root in `METADATA_CF`. - /// - /// All operations in the batch are applied atomically by RocksDB. - /// - /// # Errors - /// - `StorageError::Backend`: If any column family is missing or a RocksDB write error occurs. - fn apply(&self, updates: StorageUpdates) -> Result<(), StorageError> { - use rayon::prelude::*; - - let mut batch = WriteBatch::default(); - - let leaves_cf = self.cf_handle(LEAVES_CF)?; - let metadata_cf = self.cf_handle(METADATA_CF)?; - let depth24_cf = self.cf_handle(DEPTH_24_CF)?; - - let StorageUpdateParts { - leaf_updates, - subtree_updates, - new_root, - leaf_count_delta, - entry_count_delta, - } = updates.into_parts(); - - // Process leaf updates - for (index, maybe_leaf) in leaf_updates { - let key = Self::index_db_key(index); - match maybe_leaf { - Some(leaf) => batch.put_cf(leaves_cf, key, leaf.to_bytes()), - None => batch.delete_cf(leaves_cf, key), - } - } - - // Helper for depth 24 operations - let is_depth_24 = |index: NodeIndex| index.depth() == IN_MEMORY_DEPTH; - - // Parallel preparation of subtree operations - let subtree_ops: Result, StorageError> = subtree_updates - .into_par_iter() - .map(|update| -> Result<_, StorageError> { - let (index, maybe_bytes, depth24_op) = match update { - SubtreeUpdate::Store { index, subtree } => { - let bytes = subtree.to_vec(); - let depth24_op = is_depth_24(index) - .then(|| subtree.get_inner_node(index)) - .flatten() - .map(|root_node| { - let hash_key = Self::index_db_key(index.value()); - (hash_key, Some(root_node.hash().to_bytes())) - }); - (index, Some(bytes), depth24_op) - }, - SubtreeUpdate::Delete { index } => { - let depth24_op = is_depth_24(index).then(|| { - let hash_key = Self::index_db_key(index.value()); - (hash_key, None) - }); - (index, None, depth24_op) - }, - }; - - let key = Self::subtree_db_key(index); - let subtrees_cf = self.subtree_cf(index); - - Ok((subtrees_cf, key, maybe_bytes, depth24_op)) - }) - .collect(); - - // Sequential batch building - for (subtrees_cf, key, maybe_bytes, depth24_op) in subtree_ops? { - match maybe_bytes { - Some(bytes) => batch.put_cf(subtrees_cf, key, bytes), - None => batch.delete_cf(subtrees_cf, key), - } - - if let Some((hash_key, maybe_hash_bytes)) = depth24_op { - match maybe_hash_bytes { - Some(hash_bytes) => batch.put_cf(depth24_cf, hash_key, hash_bytes), - None => batch.delete_cf(depth24_cf, hash_key), - } - } - } - - if leaf_count_delta != 0 || entry_count_delta != 0 { - let current_leaf_count = self.leaf_count()?; - let current_entry_count = self.entry_count()?; - - let new_leaf_count = current_leaf_count.saturating_add_signed(leaf_count_delta); - let new_entry_count = current_entry_count.saturating_add_signed(entry_count_delta); - - batch.put_cf(metadata_cf, LEAF_COUNT_KEY, new_leaf_count.to_be_bytes()); - batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, new_entry_count.to_be_bytes()); - } - - batch.put_cf(metadata_cf, ROOT_KEY, new_root.to_bytes()); - - let mut write_opts = rocksdb::WriteOptions::default(); - // Disable immediate WAL sync to disk for better performance - write_opts.set_sync(false); - self.db.write_opt(batch, &write_opts)?; - - Ok(()) - } - - /// Returns an iterator over all (logical u64 index, `SmtLeaf`) pairs in the `LEAVES_CF`. - /// - /// The iterator uses a RocksDB snapshot for consistency and iterates in lexicographical - /// order of the keys (leaf indices). Errors during iteration (e.g., deserialization issues) - /// cause the iterator to skip the problematic item and attempt to continue. - /// - /// # Errors - /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs - /// during iterator creation. - fn iter_leaves(&self) -> Result + '_>, StorageError> { - let cf = self.cf_handle(LEAVES_CF)?; - let mut read_opts = ReadOptions::default(); - read_opts.set_total_order_seek(true); - let db_iter = self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start); - - Ok(Box::new(RocksDbDirectLeafIterator { iter: db_iter })) - } - - /// Returns an iterator over all `Subtree` instances across all subtree column families. - /// - /// The iterator uses a RocksDB snapshot and iterates in lexicographical order of keys - /// (subtree root NodeIndex) across all depth column families (24, 32, 40, 48, 56). - /// Errors during iteration (e.g., deserialization issues) cause the iterator to skip - /// the problematic item and attempt to continue. - /// - /// # Errors - /// - `StorageError::Backend`: If any subtree column family is missing or a RocksDB error occurs - /// during iterator creation. - fn iter_subtrees(&self) -> Result + '_>, StorageError> { - // All subtree column family names in order - const SUBTREE_CFS: [&str; 5] = - [SUBTREE_24_CF, SUBTREE_32_CF, SUBTREE_40_CF, SUBTREE_48_CF, SUBTREE_56_CF]; - - let mut cf_handles = Vec::new(); - for cf_name in SUBTREE_CFS { - cf_handles.push(self.cf_handle(cf_name)?); - } - - Ok(Box::new(RocksDbSubtreeIterator::new(&self.db, cf_handles))) - } - - /// Retrieves all depth 24 hashes for fast tree rebuilding. - /// - /// # Errors - /// - `StorageError::Backend`: If the depth24 column family is missing or a RocksDB error - /// occurs. - /// - `StorageError::Value`: If any hash bytes are corrupt. - fn get_depth24(&self) -> Result, StorageError> { - let cf = self.cf_handle(DEPTH_24_CF)?; - let iter = self.db.iterator_cf(cf, IteratorMode::Start); - let mut hashes = Vec::new(); - - for item in iter { - let (key_bytes, value_bytes) = item?; - - let index = index_from_key_bytes(&key_bytes)?; - let hash = Word::read_from_bytes(&value_bytes)?; - - hashes.push((index, hash)); - } - - Ok(hashes) - } -} - -/// Syncs the RocksDB database to disk before dropping the storage. -/// -/// This ensures that all data is persisted to disk before the storage is dropped. -/// -/// # Panics -/// - If the RocksDB sync operation fails. -impl Drop for RocksDbStorage { - fn drop(&mut self) { - if let Err(e) = self.sync() { - panic!("failed to flush RocksDB on drop: {e}"); - } - } -} - -// ITERATORS -// -------------------------------------------------------------------------------------------- - -/// An iterator over leaves directly from RocksDB. -/// -/// Wraps a `DBIteratorWithThreadMode` and handles deserialization of keys to `u64` (leaf index) -/// and values to `SmtLeaf`. Skips items that fail to deserialize or if a RocksDB error occurs -/// for an item, attempting to continue iteration. -struct RocksDbDirectLeafIterator<'a> { - iter: DBIteratorWithThreadMode<'a, DB>, -} - -impl Iterator for RocksDbDirectLeafIterator<'_> { - type Item = (u64, SmtLeaf); - - fn next(&mut self) -> Option { - self.iter.find_map(|result| { - let (key_bytes, value_bytes) = result.ok()?; - let leaf_idx = index_from_key_bytes(&key_bytes).ok()?; - let leaf = SmtLeaf::read_from_bytes(&value_bytes).ok()?; - Some((leaf_idx, leaf)) - }) - } -} - -/// An iterator over subtrees from multiple RocksDB column families. -/// -/// Iterates through all subtree column families (24, 32, 40, 48, 56) sequentially. -/// When one column family is exhausted, it moves to the next one. -struct RocksDbSubtreeIterator<'a> { - db: &'a DB, - cf_handles: Vec<&'a rocksdb::ColumnFamily>, - current_cf_index: usize, - current_iter: Option>, -} - -impl<'a> RocksDbSubtreeIterator<'a> { - fn new(db: &'a DB, cf_handles: Vec<&'a rocksdb::ColumnFamily>) -> Self { - let mut iterator = Self { - db, - cf_handles, - current_cf_index: 0, - current_iter: None, - }; - iterator.advance_to_next_cf(); - iterator - } - - fn advance_to_next_cf(&mut self) { - if self.current_cf_index < self.cf_handles.len() { - let cf = self.cf_handles[self.current_cf_index]; - let mut read_opts = ReadOptions::default(); - read_opts.set_total_order_seek(true); - self.current_iter = Some(self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start)); - } else { - self.current_iter = None; - } - } - - fn try_next_from_iter( - iter: &mut DBIteratorWithThreadMode, - cf_index: usize, - ) -> Option { - iter.find_map(|result| { - let (key_bytes, value_bytes) = result.ok()?; - let depth = 24 + (cf_index * 8) as u8; - - let node_idx = subtree_root_from_key_bytes(&key_bytes, depth).ok()?; - let value_vec = value_bytes.into_vec(); - Subtree::from_vec(node_idx, &value_vec).ok() - }) - } -} - -impl Iterator for RocksDbSubtreeIterator<'_> { - type Item = Subtree; - - fn next(&mut self) -> Option { - loop { - let iter = self.current_iter.as_mut()?; - - // Try to get the next valid subtree from current iterator - if let Some(subtree) = Self::try_next_from_iter(iter, self.current_cf_index) { - return Some(subtree); - } - - // Current CF exhausted, advance to next - self.current_cf_index += 1; - self.advance_to_next_cf(); - - // If no more CFs, we're done - self.current_iter.as_ref()?; - } - } -} - -// ROCKSDB CONFIGURATION -// -------------------------------------------------------------------------------------------- - -/// Configuration for RocksDB storage used by the Sparse Merkle Tree implementation. -/// -/// This struct contains the essential configuration parameters needed to initialize -/// and optimize RocksDB for SMT storage operations. It provides sensible defaults -/// while allowing customization for specific performance requirements. -#[derive(Debug, Clone)] -pub struct RocksDbConfig { - /// The filesystem path where the RocksDB database will be stored. - /// - /// This should be a directory path that the application has read/write permissions for. - /// The database will create multiple files in this directory to store data, logs, and - /// metadata. - pub(crate) path: PathBuf, - - /// The size of the RocksDB block cache in bytes. - /// - /// This cache stores frequently accessed data blocks in memory to improve read performance. - /// Larger cache sizes generally improve read performance but consume more memory. - /// Default: 1GB (1 << 30 bytes) - pub(crate) cache_size: usize, - - /// The maximum number of files that RocksDB can have open simultaneously. - /// - /// This setting affects both memory usage and the number of file descriptors used by the - /// process. Higher values may improve performance for databases with many SST files but - /// increase resource usage. Default: 512 files - pub(crate) max_open_files: i32, -} - -impl RocksDbConfig { - /// Creates a new RocksDbConfig with the given database path and default settings. - /// - /// # Arguments - /// * `path` - The filesystem path where the RocksDB database will be stored. This can be any - /// type that converts into a `PathBuf`. - /// - /// # Default Settings - /// * `cache_size`: 1GB (1,073,741,824 bytes) - /// * `max_open_files`: 512 - /// - /// # Examples - /// ``` - /// use miden_crypto::merkle::smt::RocksDbConfig; - /// - /// let config = RocksDbConfig::new("/path/to/database"); - /// ``` - pub fn new>(path: P) -> Self { - Self { - path: path.into(), - cache_size: 1 << 30, - max_open_files: 512, - } - } - - /// Sets the block cache size for RocksDB. - /// - /// The block cache stores frequently accessed data blocks in memory to improve read - /// performance. Larger cache sizes generally improve read performance but consume more - /// memory. - /// - /// # Arguments - /// * `size` - The cache size in bytes. - /// - /// # Examples - /// ``` - /// use miden_crypto::merkle::smt::RocksDbConfig; - /// - /// let config = RocksDbConfig::new("/path/to/database") - /// .with_cache_size(2 * 1024 * 1024 * 1024); // 2GB cache - /// ``` - pub fn with_cache_size(mut self, size: usize) -> Self { - self.cache_size = size; - self - } - - /// Sets the maximum number of files that RocksDB can have open simultaneously. - /// - /// This setting affects both memory usage and the number of file descriptors used by the - /// process. Higher values may improve performance for databases with many SST files but - /// increase resource usage. - /// - /// # Arguments - /// * `count` - The maximum number of open files. Must be positive. - /// - /// # Examples - /// ``` - /// use miden_crypto::merkle::smt::RocksDbConfig; - /// - /// let config = RocksDbConfig::new("/path/to/database") - /// .with_max_open_files(1024); // Allow up to 1024 open files - /// ``` - pub fn with_max_open_files(mut self, count: i32) -> Self { - self.max_open_files = count; - self - } -} - -// SUBTREE DB KEY -// -------------------------------------------------------------------------------------------- - -/// Compact key wrapper for variable-length subtree prefixes. -/// -/// * `bytes` always holds the big-endian 8-byte value. -/// * `len` is how many leading bytes are significant (3-7). -#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] -pub(crate) struct KeyBytes { - bytes: [u8; 8], - len: u8, -} - -impl KeyBytes { - #[inline(always)] - pub fn new(value: u64, keep: usize) -> Self { - debug_assert!((3..=7).contains(&keep)); - let bytes = value.to_be_bytes(); - debug_assert!(bytes[..8 - keep].iter().all(|&b| b == 0)); - Self { bytes, len: keep as u8 } - } - - #[inline(always)] - pub fn as_slice(&self) -> &[u8] { - &self.bytes[8 - self.len as usize..] - } -} - -impl AsRef<[u8]> for KeyBytes { - #[inline(always)] - fn as_ref(&self) -> &[u8] { - self.as_slice() - } -} - -// HELPERS -// -------------------------------------------------------------------------------------------- - -/// Deserializes an index (u64) from a RocksDB key byte slice. -/// Expects `key_bytes` to be exactly 8 bytes long. -/// -/// # Errors -/// - `StorageError::BadKeyLen`: If `key_bytes` is not 8 bytes long or conversion fails. -fn index_from_key_bytes(key_bytes: &[u8]) -> Result { - if key_bytes.len() != 8 { - return Err(StorageError::BadKeyLen { expected: 8, found: key_bytes.len() }); - } - let mut arr = [0u8; 8]; - arr.copy_from_slice(key_bytes); - Ok(u64::from_be_bytes(arr)) -} - -/// Reconstructs a `NodeIndex` from the variable-length subtree key stored in RocksDB. -/// -/// * `key_bytes` is the big-endian tail of the 64-bit value: -/// - depth 56 → 7 bytes -/// - depth 48 → 6 bytes -/// - depth 40 → 5 bytes -/// - depth 32 → 4 bytes -/// - depth 24 → 3 bytes -/// -/// # Errors -/// * `StorageError::Unsupported` - `depth` is not one of 24/32/40/48/56. -/// * `StorageError::DeserializationError` - `key_bytes.len()` does not match the length required by -/// `depth`. -#[inline(always)] -fn subtree_root_from_key_bytes(key_bytes: &[u8], depth: u8) -> Result { - let expected = match depth { - 24 => 3, - 32 => 4, - 40 => 5, - 48 => 6, - 56 => 7, - d => return Err(StorageError::Unsupported(format!("unsupported subtree depth {d}"))), - }; - - if key_bytes.len() != expected { - return Err(StorageError::BadSubtreeKeyLen { depth, expected, found: key_bytes.len() }); - } - let mut buf = [0u8; 8]; - buf[8 - expected..].copy_from_slice(key_bytes); - let value = u64::from_be_bytes(buf); - Ok(NodeIndex::new_unchecked(depth, value)) -} - -/// Helper that maps an SMT depth to its column family. -#[inline(always)] -fn cf_for_depth(depth: u8) -> &'static str { - match depth { - 24 => SUBTREE_24_CF, - 32 => SUBTREE_32_CF, - 40 => SUBTREE_40_CF, - 48 => SUBTREE_48_CF, - 56 => SUBTREE_56_CF, - _ => panic!("unsupported subtree depth: {depth}"), - } -} - -impl From for StorageError { - fn from(e: rocksdb::Error) -> Self { - StorageError::Backend(Box::new(e)) - } -} diff --git a/miden-crypto/src/merkle/smt/large/subtree/mod.rs b/miden-crypto/src/merkle/smt/large/subtree/mod.rs index bc7348fb3a..b669407f07 100644 --- a/miden-crypto/src/merkle/smt/large/subtree/mod.rs +++ b/miden-crypto/src/merkle/smt/large/subtree/mod.rs @@ -12,7 +12,7 @@ pub use error::SubtreeError; #[cfg(test)] mod tests; -/// Represents a complete 8-depth subtree that is serialized into a single RocksDB entry. +/// Represents a complete 8-depth subtree that is serialized into a single storage entry. /// /// ### What is stored /// - `nodes` tracks only **non-empty inner nodes** of this subtree (i.e., nodes for which at least diff --git a/miden-crypto/src/merkle/store/mod.rs b/miden-crypto/src/merkle/store/mod.rs index 9d0fe575ad..5d0a379df2 100644 --- a/miden-crypto/src/merkle/store/mod.rs +++ b/miden-crypto/src/merkle/store/mod.rs @@ -192,7 +192,7 @@ impl MerkleStore { /// existence verification is needed. pub fn has_path(&self, root: Word, index: NodeIndex) -> bool { // check if the root exists - if self.nodes.get(&root).is_none() { + if !self.nodes.contains_key(&root) { return false; } diff --git a/miden-crypto/tests/rocksdb_large_smt.rs b/miden-crypto/tests/rocksdb_large_smt.rs deleted file mode 100644 index e677b918ac..0000000000 --- a/miden-crypto/tests/rocksdb_large_smt.rs +++ /dev/null @@ -1,147 +0,0 @@ -use miden_crypto::{ - EMPTY_WORD, Felt, ONE, WORD_SIZE, Word, - merkle::{ - InnerNodeInfo, - smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, - }, -}; -use tempfile::TempDir; - -fn setup_storage() -> (RocksDbStorage, TempDir) { - let temp_dir = tempfile::Builder::new() - .prefix("test_smt_rocksdb_") - .tempdir() - .expect("Failed to create temporary directory for RocksDB test"); - - let db_path = temp_dir.path().to_path_buf(); - - let storage = RocksDbStorage::open(RocksDbConfig::new(db_path)) - .expect("Failed to open RocksDbStorage in temporary directory"); - (storage, temp_dir) -} - -fn generate_entries(pair_count: usize) -> Vec<(Word, Word)> { - (0..pair_count) - .map(|i| { - let key = Word::new([ONE, ONE, Felt::new(i as u64), Felt::new(i as u64 % 1000)]); - let value = Word::new([ONE, ONE, ONE, Felt::new(i as u64)]); - (key, value) - }) - .collect() -} - -#[test] -fn rocksdb_sanity_insert_and_get() { - let (storage, _tmp) = setup_storage(); - let mut smt = LargeSmt::::new(storage).unwrap(); - - let key = Word::new([ONE, ONE, ONE, ONE]); - let val = Word::new([ONE; WORD_SIZE]); - - let prev = smt.insert(key, val).unwrap(); - assert_eq!(prev, EMPTY_WORD); - assert_eq!(smt.get_value(&key), val); -} - -#[test] -fn rocksdb_persistence_reopen() { - let entries = generate_entries(1000); - - let (initial_storage, temp_dir_guard) = setup_storage(); - let db_path = temp_dir_guard.path().to_path_buf(); - - let smt = LargeSmt::::with_entries(initial_storage, entries).unwrap(); - let root = smt.root(); - - let mut inner_nodes: Vec = smt.inner_nodes().unwrap().collect(); - inner_nodes.sort_by_key(|info| info.value); - drop(smt); - - let reopened_storage = RocksDbStorage::open(RocksDbConfig::new(db_path)).unwrap(); - let smt = LargeSmt::::new(reopened_storage).unwrap(); - - let mut inner_nodes_2: Vec = smt.inner_nodes().unwrap().collect(); - inner_nodes_2.sort_by_key(|info| info.value); - - assert_eq!(inner_nodes.len(), inner_nodes_2.len()); - assert_eq!(inner_nodes, inner_nodes_2); - assert_eq!(smt.root(), root); -} - -#[test] -fn rocksdb_persistence_after_insertion() { - let entries = generate_entries(1000); - - let (initial_storage, temp_dir_guard) = setup_storage(); - let db_path = temp_dir_guard.path().to_path_buf(); - - let mut smt = LargeSmt::::with_entries(initial_storage, entries).unwrap(); - let key = Word::new([ONE, ONE, ONE, ONE]); - let new_value = Word::new([Felt::new(2), Felt::new(2), Felt::new(2), Felt::new(2)]); - smt.insert(key, new_value).unwrap(); - let root = smt.root(); - - let mut inner_nodes: Vec = smt.inner_nodes().unwrap().collect(); - inner_nodes.sort_by_key(|info| info.value); - drop(smt); - - let reopened_storage = RocksDbStorage::open(RocksDbConfig::new(db_path)).unwrap(); - let smt = LargeSmt::::new(reopened_storage).unwrap(); - - let mut inner_nodes_2: Vec = smt.inner_nodes().unwrap().collect(); - inner_nodes_2.sort_by_key(|info| info.value); - - assert_eq!(inner_nodes.len(), inner_nodes_2.len()); - assert_eq!(inner_nodes, inner_nodes_2); - assert_eq!(smt.root(), root); -} - -#[test] -fn rocksdb_persistence_after_insert_batch_with_deletions() { - // Create a tree with initial entries - let entries = generate_entries(10_000); - - let (initial_storage, temp_dir_guard) = setup_storage(); - let db_path = temp_dir_guard.path().to_path_buf(); - - let mut smt = LargeSmt::::with_entries(initial_storage, entries).unwrap(); - - // Create a batch that includes both insertions and deletions - let mut batch_entries: Vec<(Word, Word)> = Vec::new(); - - // Add new entries - for i in 20_000..25_000 { - let key = Word::new([ONE, ONE, Felt::new(i as u64), Felt::new(i as u64 % 1000)]); - let value = Word::new([ONE, ONE, ONE, Felt::new(i as u64)]); - batch_entries.push((key, value)); - } - - // Delete some existing entries - for i in 0..1000 { - let key = Word::new([ONE, ONE, Felt::new(i as u64), Felt::new(i as u64 % 1000)]); - batch_entries.push((key, EMPTY_WORD)); - } - - smt.insert_batch(batch_entries).unwrap(); - let root = smt.root(); - - let mut inner_nodes: Vec = smt.inner_nodes().unwrap().collect(); - inner_nodes.sort_by_key(|info| info.value); - let num_leaves = smt.num_leaves().unwrap(); - let num_entries = smt.num_entries().unwrap(); - drop(smt); - - let reopened_storage = RocksDbStorage::open(RocksDbConfig::new(db_path)).unwrap(); - let smt = LargeSmt::::new(reopened_storage).unwrap(); - - let mut inner_nodes_2: Vec = smt.inner_nodes().unwrap().collect(); - inner_nodes_2.sort_by_key(|info| info.value); - let num_leaves_2 = smt.num_leaves().unwrap(); - let num_entries_2 = smt.num_entries().unwrap(); - - assert_eq!(inner_nodes.len(), inner_nodes_2.len()); - assert_eq!(inner_nodes, inner_nodes_2); - assert_eq!(num_leaves, num_leaves_2); - assert_eq!(num_entries, num_entries_2); - assert_eq!(smt.root(), root, "Tree reconstruction failed - root mismatch after deletions"); -} From 7deaa57b12a810eff054af3735e82cadd59ba3dc Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Wed, 25 Feb 2026 10:43:05 +0100 Subject: [PATCH 3/4] changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3d32c0f5d..eedaa296c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.19.7 (TBD) + +- Expose `StorageError` and `SubtreeUpdate` as prep. to externalize the `LargeSmt` RocksDB backend ([#850](https://github.com/0xMiden/crypto/pull/850)). + ## 0.19.6 (2026-02-24) - Added `Signature::from_der()` for ECDSA signatures over secp256k1 ([#842](https://github.com/0xMiden/crypto/pull/842)). From 9224fe8ef0f9249d47f7904564d8f5ce6991b337 Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Wed, 25 Feb 2026 11:32:47 +0100 Subject: [PATCH 4/4] Revert "drop rocksdb large smt backend" This reverts commit e0728398faf019db4ab96a67ec06d84dde2966ee. --- .config/nextest.toml | 4 +- .github/workflows/lint.yml | 10 + Cargo.lock | 138 ++ Makefile | 14 +- miden-crypto/Cargo.toml | 11 + miden-crypto/benches/large-smt.rs | 155 ++ miden-crypto/src/main.rs | 85 +- .../src/merkle/smt/large/batch_ops.rs | 4 +- miden-crypto/src/merkle/smt/large/mod.rs | 111 +- .../src/merkle/smt/large/storage/mod.rs | 5 + .../src/merkle/smt/large/storage/rocksdb.rs | 1314 +++++++++++++++++ .../src/merkle/smt/large/subtree/mod.rs | 2 +- miden-crypto/src/merkle/store/mod.rs | 2 +- miden-crypto/tests/rocksdb_large_smt.rs | 147 ++ 14 files changed, 1987 insertions(+), 15 deletions(-) create mode 100644 miden-crypto/benches/large-smt.rs create mode 100644 miden-crypto/src/merkle/smt/large/storage/rocksdb.rs create mode 100644 miden-crypto/tests/rocksdb_large_smt.rs diff --git a/.config/nextest.toml b/.config/nextest.toml index e7ca0a8686..9268e014ec 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -1,5 +1,5 @@ [profile.default] -default-filter = 'not (test(merkle::smt::full::concurrent) or test(merkle::smt::full::large))' +default-filter = 'not (test(merkle::smt::full::concurrent) or test(merkle::smt::full::large) or binary(rocksdb_large_smt))' fail-fast = false failure-output = "immediate-final" @@ -9,6 +9,6 @@ fail-fast = false failure-output = "immediate-final" [profile.large-smt] -default-filter = 'test(merkle::smt::full::large)' +default-filter = '(test(merkle::smt::full::large) or binary(rocksdb_large_smt))' fail-fast = false failure-output = "immediate-final" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 032c7f5075..6fb6141955 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -42,6 +42,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + # Added: LLVM/Clang for RocksDB/bindgen + - name: Install LLVM/Clang + uses: KyleMayes/install-llvm-action@v2 + with: + version: "17" - name: Rustup run: | rustup update --no-self-update @@ -78,6 +83,11 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + # Added: LLVM/Clang for RocksDB/bindgen + - name: Install LLVM/Clang + uses: ./.github/actions/install-llvm + with: + version: "17" - name: Rustup run: rustup update --no-self-update - uses: Swatinem/rust-cache@v2 diff --git a/Cargo.lock b/Cargo.lock index 7c26e7be25..af41e56842 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,6 +119,24 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + [[package]] name = "bitflags" version = "2.10.0" @@ -153,6 +171,16 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cast" version = "0.3.0" @@ -171,6 +199,15 @@ dependencies = [ "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -239,6 +276,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.53" @@ -823,12 +871,47 @@ version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +[[package]] +name = "librocksdb-sys" +version = "0.17.3+10.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cef2a00ee60fe526157c9023edab23943fae1ce2ab6f4abb2a807c1746835de9" +dependencies = [ + "bindgen", + "bzip2-sys", + "cc", + "libc", + "libz-sys", + "lz4-sys", +] + +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -844,6 +927,16 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "memchr" version = "2.7.6" @@ -878,6 +971,7 @@ dependencies = [ "rand_core 0.9.3", "rand_hc", "rayon", + "rocksdb", "rstest", "seq-macro", "serde", @@ -901,6 +995,12 @@ dependencies = [ "syn", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "nanorand" version = "0.7.0" @@ -910,6 +1010,16 @@ dependencies = [ "getrandom 0.2.16", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num" version = "0.4.3" @@ -1030,6 +1140,12 @@ dependencies = [ "spki", ] +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "plotters" version = "0.3.7" @@ -1246,6 +1362,16 @@ dependencies = [ "subtle", ] +[[package]] +name = "rocksdb" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddb7af00d2b17dbd07d82c0063e25411959748ff03e8d4f96134c2ff41fce34f" +dependencies = [ + "libc", + "librocksdb-sys", +] + [[package]] name = "rstest" version = "0.26.1" @@ -1275,6 +1401,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -1585,6 +1717,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" diff --git a/Makefile b/Makefile index 37f561be08..55d2835dcc 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ help: # -- variables -------------------------------------------------------------------------------------- -ALL_FEATURES="concurrent executable hashmaps internal serde std" +ALL_FEATURES_EXCEPT_ROCKSDB="concurrent executable hashmaps internal serde std" DEBUG_OVERFLOW_INFO=RUSTFLAGS="-C debug-assertions -C overflow-checks -C debuginfo=2" WARNINGS=RUSTDOCFLAGS="-D warnings" @@ -64,7 +64,7 @@ doc: ## Generate and check documentation .PHONY: test-default test-default: ## Run tests with default features - $(DEBUG_OVERFLOW_INFO) cargo nextest run --profile default --release --features ${ALL_FEATURES} + $(DEBUG_OVERFLOW_INFO) cargo nextest run --profile default --release --features ${ALL_FEATURES_EXCEPT_ROCKSDB} .PHONY: test-hashmaps test-hashmaps: ## Run tests with `hashmaps` feature enabled @@ -84,7 +84,7 @@ test-docs: .PHONY: test-large-smt test-large-smt: ## Run only large SMT tests - $(DEBUG_OVERFLOW_INFO) cargo nextest run --success-output immediate --profile large-smt --release --features hashmaps + $(DEBUG_OVERFLOW_INFO) cargo nextest run --success-output immediate --profile large-smt --release --features hashmaps,rocksdb .PHONY: test test: test-default test-hashmaps test-no-std test-docs test-large-smt ## Run all tests except concurrent SMT tests @@ -135,6 +135,14 @@ bench-smt-concurrent: ## Run SMT benchmarks with concurrent feature bench-large-smt-memory: ## Run large SMT benchmarks with memory storage cargo run --release --features concurrent,hashmaps,executable -- --size 1000000 +.PHONY: bench-large-smt-rocksdb +bench-large-smt-rocksdb: ## Run large SMT benchmarks with rocksdb storage + cargo run --release --features concurrent,hashmaps,rocksdb,executable -- --storage rocksdb --size 1000000 + +.PHONY: bench-large-smt-rocksdb-open +bench-large-smt-rocksdb-open: ## Run large SMT benchmarks with rocksdb storage and open existing database + cargo run --release --features concurrent,hashmaps,rocksdb,executable -- --storage rocksdb --open + # --- fuzzing -------------------------------------------------------------------------------- .PHONY: fuzz-smt diff --git a/miden-crypto/Cargo.toml b/miden-crypto/Cargo.toml index 9608498e19..5f731e80c0 100644 --- a/miden-crypto/Cargo.toml +++ b/miden-crypto/Cargo.toml @@ -36,6 +36,11 @@ required-features = ["internal"] harness = false name = "merkle" +[[bench]] +harness = false +name = "large-smt" +required-features = ["rocksdb"] + [[bench]] harness = false name = "store" @@ -59,6 +64,7 @@ executable = ["dep:clap", "dep:rand-utils", "std"] fuzzing = [] hashmaps = ["dep:hashbrown"] internal = [] +rocksdb = ["concurrent", "dep:rocksdb"] serde = ["dep:serde", "serde?/alloc", "winter-math/serde"] std = [ "blake3/std", @@ -89,6 +95,7 @@ rand_chacha = { default-features = false, version = "0.9" } rand_core = { default-features = false, version = "0.9" } rand_hc = { version = "0.3" } rayon = { optional = true, version = "1.10" } +rocksdb = { default-features = false, features = ["bindgen-runtime", "lz4"], optional = true, version = "0.24" } serde = { default-features = false, features = ["derive"], optional = true, version = "1.0" } sha2 = { default-features = false, version = "0.10" } sha3 = { default-features = false, version = "0.10" } @@ -120,3 +127,7 @@ workspace = true [package.metadata.cargo-machete] ignored = ["getrandom"] + +[[test]] +name = "rocksdb_large_smt" +required-features = ["concurrent", "rocksdb"] diff --git a/miden-crypto/benches/large-smt.rs b/miden-crypto/benches/large-smt.rs new file mode 100644 index 0000000000..6d1daf0eaf --- /dev/null +++ b/miden-crypto/benches/large-smt.rs @@ -0,0 +1,155 @@ +use std::hint; + +use criterion::{Criterion, criterion_group, criterion_main}; +use miden_crypto::{ + Word, + merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, +}; + +mod common; +use common::*; + +use crate::{ + common::data::{generate_smt_entries_sequential, generate_test_keys_sequential}, + config::{DEFAULT_MEASUREMENT_TIME, DEFAULT_SAMPLE_SIZE}, +}; + +benchmark_with_setup_data! { + large_smt_open, + DEFAULT_MEASUREMENT_TIME, + DEFAULT_SAMPLE_SIZE, + "open", + || { + let entries = generate_smt_entries_sequential(256); + let keys = generate_test_keys_sequential(10); + let temp_dir = tempfile::TempDir::new().unwrap(); + let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap(); + let smt = LargeSmt::with_entries(storage, entries).unwrap(); + (smt, keys, temp_dir) + }, + |b: &mut criterion::Bencher, (smt, keys, _temp_dir): &(LargeSmt, Vec, tempfile::TempDir)| { + b.iter(|| { + for key in keys { + hint::black_box(smt.open(key)); + } + }) + }, +} + +benchmark_with_setup_data! { + large_smt_compute_mutations, + DEFAULT_MEASUREMENT_TIME, + DEFAULT_SAMPLE_SIZE, + "compute_mutations", + || { + let entries = generate_smt_entries_sequential(256); + let temp_dir = tempfile::TempDir::new().unwrap(); + let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap(); + let smt = LargeSmt::with_entries(storage, entries).unwrap(); + let new_entries = generate_smt_entries_sequential(10_000); + (smt, new_entries, temp_dir) + }, + |b: &mut criterion::Bencher, (smt, new_entries, _temp_dir): &(LargeSmt, Vec<(Word, Word)>, tempfile::TempDir)| { + b.iter(|| { + hint::black_box(smt.compute_mutations(new_entries.clone()).unwrap()); + }) + }, +} + +// Benchmarks apply_mutations at different batch sizes. +// Setup: Creates fresh tree and computes mutations +// Measured: Only the apply_mutations call +// Tests: Performance scaling with mutation size (100, 1k, 10k entries) on a tree with 256 entries +benchmark_batch! { + large_smt_apply_mutations, + &[100, 1_000, 10_000], + |b: &mut criterion::Bencher, entry_count: usize| { + use criterion::BatchSize; + + let base_entries = generate_smt_entries_sequential(256); + let bench_dir = std::env::temp_dir().join("bench_apply_mutations"); + + b.iter_batched( + || { + std::fs::create_dir_all(&bench_dir).unwrap(); + let storage = RocksDbStorage::open(RocksDbConfig::new(&bench_dir)).unwrap(); + let smt = LargeSmt::with_entries(storage, base_entries.clone()).unwrap(); + let new_entries = generate_smt_entries_sequential(entry_count); + let mutations = smt.compute_mutations(new_entries).unwrap(); + + (smt, mutations, bench_dir.clone()) + }, + |(mut smt, mutations, bench_dir)| { + smt.apply_mutations(mutations).unwrap(); + drop(smt); + let _ = std::fs::remove_dir_all(&bench_dir); + }, + BatchSize::LargeInput, + ) + }, + |size| Some(criterion::Throughput::Elements(size as u64)) +} + +// Benchmarks apply_mutations_with_reversion at different batch sizes. +// Setup: Creates fresh tree and computes mutations +// Measured: Only the apply_mutations_with_reversion call +// Tests: Performance scaling with mutation size (100, 1k, 10k entries) on a tree with 256 entries +benchmark_batch! { + large_smt_apply_mutations_with_reversion, + &[100, 1_000, 10_000], + |b: &mut criterion::Bencher, entry_count: usize| { + use criterion::BatchSize; + + let base_entries = generate_smt_entries_sequential(256); + let bench_dir = std::env::temp_dir().join("bench_apply_mutations_with_reversion"); + + b.iter_batched( + || { + std::fs::create_dir_all(&bench_dir).unwrap(); + let storage = RocksDbStorage::open(RocksDbConfig::new(&bench_dir)).unwrap(); + let smt = LargeSmt::with_entries(storage, base_entries.clone()).unwrap(); + let new_entries = generate_smt_entries_sequential(entry_count); + let mutations = smt.compute_mutations(new_entries).unwrap(); + + (smt, mutations, bench_dir.clone()) + }, + |(mut smt, mutations, bench_dir)| { + let _ = smt.apply_mutations_with_reversion(mutations).unwrap(); + drop(smt); + let _ = std::fs::remove_dir_all(&bench_dir); + }, + BatchSize::LargeInput, + ) + }, + |size| Some(criterion::Throughput::Elements(size as u64)) +} + +benchmark_batch! { + large_smt_insert_batch, + &[1, 16, 32, 64, 128], + |b: &mut criterion::Bencher, insert_count: usize| { + let base_entries = generate_smt_entries_sequential(256); + let temp_dir = tempfile::TempDir::new().unwrap(); + let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap(); + let mut smt = LargeSmt::with_entries(storage, base_entries).unwrap(); + + b.iter(|| { + for _ in 0..insert_count { + let new_entries = generate_smt_entries_sequential(10_000); + smt.insert_batch(new_entries).unwrap(); + } + }) + }, + |size| Some(criterion::Throughput::Elements(size as u64)) +} + +criterion_group!( + large_smt_benchmark_group, + large_smt_open, + large_smt_compute_mutations, + large_smt_apply_mutations, + large_smt_apply_mutations_with_reversion, + large_smt_insert_batch, +); + +criterion_main!(large_smt_benchmark_group); diff --git a/miden-crypto/src/main.rs b/miden-crypto/src/main.rs index 7496033896..a52d388f15 100644 --- a/miden-crypto/src/main.rs +++ b/miden-crypto/src/main.rs @@ -1,6 +1,8 @@ -use std::time::Instant; +use std::{path::PathBuf, time::Instant}; -use clap::Parser; +use clap::{Parser, ValueEnum}; +#[cfg(feature = "rocksdb")] +use miden_crypto::merkle::smt::{RocksDbConfig, RocksDbStorage}; use miden_crypto::{ EMPTY_WORD, Felt, ONE, Word, hash::rpo::Rpo256, @@ -23,9 +25,24 @@ pub struct BenchmarkCmd { /// Number of updates #[arg(short = 'u', long = "updates", default_value = "10000")] updates: usize, + /// Path for the benchmark database + #[clap(short = 'p', long = "path")] + storage_path: Option, + /// Open existing database and skip construction + #[clap(short = 'o', long = "open", default_value = "false")] + open: bool, /// Number of batch operations #[clap(short = 'b', long = "batches", default_value = "1")] batches: usize, + /// Storage backend to use at runtime: memory or rocksdb + #[arg(short = 's', long = "storage", value_enum, default_value = "memory")] + storage: StorageKind, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)] +pub enum StorageKind { + Memory, + Rocksdb, } fn main() { @@ -39,9 +56,16 @@ pub fn benchmark_smt() { let tree_size = args.size; let insertions = args.insertions; let updates = args.updates; + let storage_path = args.storage_path; let batches = args.batches; - println!("Running benchmark with memory storage"); + println!( + "Running benchmark with {} storage", + match args.storage { + StorageKind::Memory => "memory", + StorageKind::Rocksdb => "rocksdb", + } + ); assert!(updates <= tree_size, "Cannot update more than `size`"); // prepare the `leaves` vector for tree creation let mut entries = Vec::new(); @@ -51,7 +75,11 @@ pub fn benchmark_smt() { entries.push((key, value)); } - let mut tree = construction(entries.clone(), tree_size).unwrap(); + let mut tree = if args.open { + open_existing(storage_path, args.storage).unwrap() + } else { + construction(entries.clone(), tree_size, storage_path, args.storage).unwrap() + }; insertion(&mut tree, insertions).unwrap(); for _ in 0..batches { batched_insertion(&mut tree, insertions).unwrap(); @@ -64,10 +92,12 @@ pub fn benchmark_smt() { pub fn construction( entries: Vec<(Word, Word)>, size: usize, + database_path: Option, + storage: StorageKind, ) -> Result, LargeSmtError> { println!("Running a construction benchmark:"); let now = Instant::now(); - let storage: Storage = Box::new(MemoryStorage::new()); + let storage = get_storage(database_path, false, storage); let tree = LargeSmt::with_entries(storage, entries)?; let elapsed = now.elapsed().as_secs_f32(); println!("Constructed an SMT with {size} key-value pairs in {elapsed:.1} seconds"); @@ -76,6 +106,18 @@ pub fn construction( Ok(tree) } +pub fn open_existing( + storage_path: Option, + storage: StorageKind, +) -> Result, LargeSmtError> { + println!("Opening an existing database:"); + let now = Instant::now(); + let storage = get_storage(storage_path, true, storage); + let tree = LargeSmt::new(storage)?; + let elapsed = now.elapsed().as_secs_f32(); + println!("Opened an existing database in {elapsed:.1} seconds"); + Ok(tree) +} /// Runs the insertion benchmark for the [`Smt`]. pub fn insertion(tree: &mut LargeSmt, insertions: usize) -> Result<(), LargeSmtError> { println!("Running an insertion benchmark:"); @@ -233,3 +275,36 @@ pub fn proof_generation(tree: &mut LargeSmt) -> Result<(), LargeSmtErro Ok(()) } + +#[allow(unused_variables)] +fn get_storage(database_path: Option, open: bool, kind: StorageKind) -> Storage { + match kind { + StorageKind::Memory => Box::new(MemoryStorage::new()), + StorageKind::Rocksdb => { + #[cfg(feature = "rocksdb")] + { + let path = database_path + .unwrap_or_else(|| std::env::temp_dir().join("miden_crypto_benchmark")); + println!("Using database path: {}", path.display()); + if !open { + // delete the folder if it exists as we are creating a new database + if path.exists() { + std::fs::remove_dir_all(path.clone()).unwrap(); + } + std::fs::create_dir_all(path.clone()) + .expect("Failed to create database directory"); + } + let db = RocksDbStorage::open( + RocksDbConfig::new(path).with_cache_size(1 << 30).with_max_open_files(2048), + ) + .expect("Failed to open database"); + Box::new(db) + } + #[cfg(not(feature = "rocksdb"))] + { + eprintln!("rocksdb feature not enabled; falling back to memory storage"); + Box::new(MemoryStorage::new()) + } + }, + } +} diff --git a/miden-crypto/src/merkle/smt/large/batch_ops.rs b/miden-crypto/src/merkle/smt/large/batch_ops.rs index b6304844ee..5428be3101 100644 --- a/miden-crypto/src/merkle/smt/large/batch_ops.rs +++ b/miden-crypto/src/merkle/smt/large/batch_ops.rs @@ -308,11 +308,11 @@ impl LargeSmt { /// ```no_run /// use miden_crypto::{ /// EMPTY_WORD, Felt, Word, - /// merkle::smt::{LargeSmt, MemoryStorage}, + /// merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, /// }; /// /// # fn main() -> Result<(), Box> { - /// let storage = MemoryStorage::default(); + /// let storage = RocksDbStorage::open(RocksDbConfig::new("/path/to/db"))?; /// let mut smt = LargeSmt::new(storage)?; /// /// let entries = vec![ diff --git a/miden-crypto/src/merkle/smt/large/mod.rs b/miden-crypto/src/merkle/smt/large/mod.rs index 306fc6af43..c3420b39a2 100644 --- a/miden-crypto/src/merkle/smt/large/mod.rs +++ b/miden-crypto/src/merkle/smt/large/mod.rs @@ -2,7 +2,114 @@ //! //! `LargeSmt` stores the top of the tree (depths 0–23) in memory and persists the lower //! depths (24–64) in storage as fixed-size subtrees. This hybrid layout scales beyond RAM -//! while keeping common operations fast. +//! while keeping common operations fast. With the `rocksdb` feature enabled, the lower +//! subtrees and leaves are stored in RocksDB. On reopen, the in-memory top is reconstructed +//! from cached depth-24 subtree roots. +//! +//! Examples below require the `rocksdb` feature. +//! +//! Open an existing RocksDB-backed tree: +//! ```no_run +//! use miden_crypto::merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}; +//! +//! # fn main() -> Result<(), Box> { +//! let storage = RocksDbStorage::open(RocksDbConfig::new("/path/to/db"))?; +//! let smt = LargeSmt::new(storage)?; // reconstructs in-memory top if data exists +//! let _root = smt.root(); +//! # Ok(()) +//! # } +//! ``` +//! +//! Initialize an empty RocksDB-backed tree and bulk-load entries: +//! ```no_run +//! use miden_crypto::{ +//! Felt, Word, +//! merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, +//! }; +//! +//! # fn main() -> Result<(), Box> { +//! let path = "/path/to/new-db"; +//! if std::path::Path::new(path).exists() { +//! std::fs::remove_dir_all(path)?; +//! } +//! std::fs::create_dir_all(path)?; +//! +//! let storage = RocksDbStorage::open(RocksDbConfig::new(path))?; +//! let mut smt = LargeSmt::new(storage)?; // empty tree +//! +//! // Prepare initial entries +//! let entries = vec![ +//! ( +//! Word::new([Felt::new(1), Felt::new(0), Felt::new(0), Felt::new(0)]), +//! Word::new([Felt::new(10), Felt::new(20), Felt::new(30), Felt::new(40)]), +//! ), +//! ( +//! Word::new([Felt::new(2), Felt::new(0), Felt::new(0), Felt::new(0)]), +//! Word::new([Felt::new(11), Felt::new(22), Felt::new(33), Felt::new(44)]), +//! ), +//! ]; +//! +//! // Bulk insert entries (faster than compute_mutations + apply_mutations) +//! smt.insert_batch(entries)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! Apply batch updates (insertions and deletions): +//! ```no_run +//! use miden_crypto::{ +//! EMPTY_WORD, Felt, Word, +//! merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, +//! }; +//! +//! # fn main() -> Result<(), Box> { +//! let storage = RocksDbStorage::open(RocksDbConfig::new("/path/to/db"))?; +//! let mut smt = LargeSmt::new(storage)?; +//! +//! let k1 = Word::new([Felt::new(101), Felt::new(0), Felt::new(0), Felt::new(0)]); +//! let v1 = Word::new([Felt::new(1), Felt::new(2), Felt::new(3), Felt::new(4)]); +//! let k2 = Word::new([Felt::new(202), Felt::new(0), Felt::new(0), Felt::new(0)]); +//! let k3 = Word::new([Felt::new(303), Felt::new(0), Felt::new(0), Felt::new(0)]); +//! let v3 = Word::new([Felt::new(7), Felt::new(7), Felt::new(7), Felt::new(7)]); +//! +//! // EMPTY_WORD marks deletions +//! let updates = vec![(k1, v1), (k2, EMPTY_WORD), (k3, v3)]; +//! smt.insert_batch(updates)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! Quick initialization with `with_entries` (best for modest datasets/tests): +//! ```no_run +//! use miden_crypto::{ +//! Felt, Word, +//! merkle::smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, +//! }; +//! +//! # fn main() -> Result<(), Box> { +//! // Note: `with_entries` expects an EMPTY storage and performs an all-at-once build. +//! // Prefer `insert_batch` for large bulk loads. +//! let path = "/path/to/new-db"; +//! if std::path::Path::new(path).exists() { +//! std::fs::remove_dir_all(path)?; +//! } +//! std::fs::create_dir_all(path)?; +//! +//! let storage = RocksDbStorage::open(RocksDbConfig::new(path))?; +//! let entries = vec![ +//! ( +//! Word::new([Felt::new(1), Felt::new(0), Felt::new(0), Felt::new(0)]), +//! Word::new([Felt::new(10), Felt::new(20), Felt::new(30), Felt::new(40)]), +//! ), +//! ( +//! Word::new([Felt::new(2), Felt::new(0), Felt::new(0), Felt::new(0)]), +//! Word::new([Felt::new(11), Felt::new(22), Felt::new(33), Felt::new(44)]), +//! ), +//! ]; +//! let _smt = LargeSmt::with_entries(storage, entries)?; +//! # Ok(()) +//! # } +//! ``` //! //! ## Performance and Memory Considerations //! @@ -57,6 +164,8 @@ mod storage; pub use storage::{ MemoryStorage, SmtStorage, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate, }; +#[cfg(feature = "rocksdb")] +pub use storage::{RocksDbConfig, RocksDbStorage}; mod iter; pub use iter::LargeSmtInnerNodeIterator; diff --git a/miden-crypto/src/merkle/smt/large/storage/mod.rs b/miden-crypto/src/merkle/smt/large/storage/mod.rs index 94326ea9cb..a15530c65d 100644 --- a/miden-crypto/src/merkle/smt/large/storage/mod.rs +++ b/miden-crypto/src/merkle/smt/large/storage/mod.rs @@ -12,6 +12,11 @@ use crate::{ mod error; pub use error::StorageError; +#[cfg(feature = "rocksdb")] +mod rocksdb; +#[cfg(feature = "rocksdb")] +pub use rocksdb::{RocksDbConfig, RocksDbStorage}; + mod memory; pub use memory::MemoryStorage; diff --git a/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs b/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs new file mode 100644 index 0000000000..f5271013de --- /dev/null +++ b/miden-crypto/src/merkle/smt/large/storage/rocksdb.rs @@ -0,0 +1,1314 @@ +use alloc::{boxed::Box, vec::Vec}; +use std::{path::PathBuf, sync::Arc}; + +use rocksdb::{ + BlockBasedOptions, Cache, ColumnFamilyDescriptor, DB, DBCompactionStyle, DBCompressionType, + DBIteratorWithThreadMode, FlushOptions, IteratorMode, Options, ReadOptions, WriteBatch, +}; +use winter_utils::{Deserializable, Serializable}; + +use super::{SmtStorage, StorageError, StorageUpdateParts, StorageUpdates, SubtreeUpdate}; +use crate::{ + EMPTY_WORD, Word, + merkle::{ + NodeIndex, + smt::{ + InnerNode, Map, SmtLeaf, + large::{IN_MEMORY_DEPTH, LargeSmt, subtree::Subtree}, + }, + }, +}; + +/// The name of the RocksDB column family used for storing SMT leaves. +const LEAVES_CF: &str = "leaves"; +/// The names of the RocksDB column families used for storing SMT subtrees (deep nodes). +const SUBTREE_24_CF: &str = "st24"; +const SUBTREE_32_CF: &str = "st32"; +const SUBTREE_40_CF: &str = "st40"; +const SUBTREE_48_CF: &str = "st48"; +const SUBTREE_56_CF: &str = "st56"; + +/// The name of the RocksDB column family used for storing metadata (e.g., root, counts). +const METADATA_CF: &str = "metadata"; +/// The name of the RocksDB column family used for storing level 24 hashes for fast tree rebuilding. +const DEPTH_24_CF: &str = "depth24"; + +/// The key used in the `METADATA_CF` column family to store the SMT's root hash. +const ROOT_KEY: &[u8] = b"smt_root"; +/// The key used in the `METADATA_CF` column family to store the total count of non-empty leaves. +const LEAF_COUNT_KEY: &[u8] = b"leaf_count"; +/// The key used in the `METADATA_CF` column family to store the total count of key-value entries. +const ENTRY_COUNT_KEY: &[u8] = b"entry_count"; + +/// A RocksDB-backed persistent storage implementation for a Sparse Merkle Tree (SMT). +/// +/// Implements the `SmtStorage` trait, providing durable storage for SMT components +/// including leaves, subtrees (for deeper parts of the tree), and metadata like the SMT root +/// and counts. It leverages RocksDB column families to organize data: +/// - `LEAVES_CF` ("leaves"): Stores `SmtLeaf` data, keyed by their logical u64 index. +/// - `SUBTREE_24_CF` ("st24"): Stores serialized `Subtree` data at depth 24, keyed by their root +/// `NodeIndex`. +/// - `SUBTREE_32_CF` ("st32"): Stores serialized `Subtree` data at depth 32, keyed by their root +/// `NodeIndex`. +/// - `SUBTREE_40_CF` ("st40"): Stores serialized `Subtree` data at depth 40, keyed by their root +/// `NodeIndex`. +/// - `SUBTREE_48_CF` ("st48"): Stores serialized `Subtree` data at depth 48, keyed by their root +/// `NodeIndex`. +/// - `SUBTREE_56_CF` ("st56"): Stores serialized `Subtree` data at depth 56, keyed by their root +/// `NodeIndex`. +/// - `METADATA_CF` ("metadata"): Stores overall SMT metadata such as the current root hash, total +/// leaf count, and total entry count. +#[derive(Debug, Clone)] +pub struct RocksDbStorage { + db: Arc, +} + +impl RocksDbStorage { + /// Opens or creates a RocksDB database at the specified `path` and configures it for SMT + /// storage. + /// + /// This method sets up the necessary column families (`leaves`, `subtrees`, `metadata`) + /// and applies various RocksDB options for performance, such as caching, bloom filters, + /// and compaction strategies tailored for SMT workloads. + /// + /// # Errors + /// Returns `StorageError::Backend` if the database cannot be opened or configured, + /// for example, due to path issues, permissions, or RocksDB internal errors. + pub fn open(config: RocksDbConfig) -> Result { + // Base DB options + let mut db_opts = Options::default(); + // Create DB if it doesn't exist + db_opts.create_if_missing(true); + // Auto-create missing column families + db_opts.create_missing_column_families(true); + // Tune compaction threads to match CPU cores + db_opts.increase_parallelism(rayon::current_num_threads() as i32); + // Limit the number of open file handles + db_opts.set_max_open_files(config.max_open_files); + // Parallelize flush/compaction up to CPU count + db_opts.set_max_background_jobs(rayon::current_num_threads() as i32); + // Maximum WAL size + db_opts.set_max_total_wal_size(512 * 1024 * 1024); + + // Shared block cache across all column families + let cache = Cache::new_lru_cache(config.cache_size); + + // Common table options for bloom filtering and cache + let mut table_opts = BlockBasedOptions::default(); + // Use shared LRU cache for block data + table_opts.set_block_cache(&cache); + table_opts.set_bloom_filter(10.0, false); + // Enable whole-key bloom filtering (better with point lookups) + table_opts.set_whole_key_filtering(true); + // Pin L0 filter and index blocks in cache (improves performance) + table_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); + + // Column family for leaves + let mut leaves_opts = Options::default(); + leaves_opts.set_block_based_table_factory(&table_opts); + // 128 MB memtable + leaves_opts.set_write_buffer_size(128 << 20); + // Allow up to 3 memtables + leaves_opts.set_max_write_buffer_number(3); + leaves_opts.set_min_write_buffer_number_to_merge(1); + // Do not retain flushed memtables in memory + leaves_opts.set_max_write_buffer_size_to_maintain(0); + // Use level-based compaction + leaves_opts.set_compaction_style(DBCompactionStyle::Level); + // 512 MB target file size + leaves_opts.set_target_file_size_base(512 << 20); + leaves_opts.set_target_file_size_multiplier(2); + // LZ4 compression + leaves_opts.set_compression_type(DBCompressionType::Lz4); + // Set level-based compaction parameters + leaves_opts.set_level_zero_file_num_compaction_trigger(8); + + // Helper to build subtree CF options with correct prefix length + fn subtree_cf(cache: &Cache, bloom_filter_bits: f64) -> Options { + let mut tbl = BlockBasedOptions::default(); + // Use shared LRU cache for block data + tbl.set_block_cache(cache); + // Set bloom filter for subtree lookups + tbl.set_bloom_filter(bloom_filter_bits, false); + // Enable whole-key bloom filtering + tbl.set_whole_key_filtering(true); + // Pin L0 filter and index blocks in cache + tbl.set_pin_l0_filter_and_index_blocks_in_cache(true); + + let mut opts = Options::default(); + opts.set_block_based_table_factory(&tbl); + // 128 MB memtable + opts.set_write_buffer_size(128 << 20); + opts.set_max_write_buffer_number(3); + opts.set_min_write_buffer_number_to_merge(1); + // Do not retain flushed memtables in memory + opts.set_max_write_buffer_size_to_maintain(0); + // Use level-based compaction + opts.set_compaction_style(DBCompactionStyle::Level); + // Trigger compaction at 4 L0 files + opts.set_level_zero_file_num_compaction_trigger(4); + // 512 MB target file size + opts.set_target_file_size_base(512 << 20); + opts.set_target_file_size_multiplier(2); + // LZ4 compression + opts.set_compression_type(DBCompressionType::Lz4); + // Set level-based compaction parameters + opts.set_level_zero_file_num_compaction_trigger(8); + opts + } + + let mut depth24_opts = Options::default(); + depth24_opts.set_compression_type(DBCompressionType::Lz4); + depth24_opts.set_block_based_table_factory(&table_opts); + + // Metadata CF with no compression + let mut metadata_opts = Options::default(); + metadata_opts.set_compression_type(DBCompressionType::None); + + // Define column families with tailored options + let cfs = vec![ + ColumnFamilyDescriptor::new(LEAVES_CF, leaves_opts), + ColumnFamilyDescriptor::new(SUBTREE_24_CF, subtree_cf(&cache, 8.0)), + ColumnFamilyDescriptor::new(SUBTREE_32_CF, subtree_cf(&cache, 10.0)), + ColumnFamilyDescriptor::new(SUBTREE_40_CF, subtree_cf(&cache, 10.0)), + ColumnFamilyDescriptor::new(SUBTREE_48_CF, subtree_cf(&cache, 12.0)), + ColumnFamilyDescriptor::new(SUBTREE_56_CF, subtree_cf(&cache, 12.0)), + ColumnFamilyDescriptor::new(METADATA_CF, metadata_opts), + ColumnFamilyDescriptor::new(DEPTH_24_CF, depth24_opts), + ]; + + // Open the database with our tuned CFs + let db = DB::open_cf_descriptors(&db_opts, config.path, cfs)?; + + Ok(Self { db: Arc::new(db) }) + } + + /// Syncs the RocksDB database to disk. + /// + /// This ensures that all data is persisted to disk. + /// + /// # Errors + /// - Returns `StorageError::Backend` if the flush operation fails. + fn sync(&self) -> Result<(), StorageError> { + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + for name in [ + LEAVES_CF, + SUBTREE_24_CF, + SUBTREE_32_CF, + SUBTREE_40_CF, + SUBTREE_48_CF, + SUBTREE_56_CF, + METADATA_CF, + DEPTH_24_CF, + ] { + let cf = self.cf_handle(name)?; + self.db.flush_cf_opt(cf, &fopts)?; + } + + self.db.flush_wal(true)?; + Ok(()) + } + + /// Converts an index (u64) into a fixed-size byte array for use as a RocksDB key. + #[inline(always)] + fn index_db_key(index: u64) -> [u8; 8] { + index.to_be_bytes() + } + + /// Converts a `NodeIndex` (for a subtree root) into a `KeyBytes` for use as a RocksDB key. + /// The `KeyBytes` is a wrapper around a 8-byte value with a variable-length prefix. + #[inline(always)] + fn subtree_db_key(index: NodeIndex) -> KeyBytes { + let keep = match index.depth() { + 24 => 3, + 32 => 4, + 40 => 5, + 48 => 6, + 56 => 7, + d => panic!("unsupported depth {d}"), + }; + KeyBytes::new(index.value(), keep) + } + + /// Retrieves a handle to a RocksDB column family by its name. + /// + /// # Errors + /// Returns `StorageError::Backend` if the column family with the given `name` does not + /// exist. + fn cf_handle(&self, name: &str) -> Result<&rocksdb::ColumnFamily, StorageError> { + self.db + .cf_handle(name) + .ok_or_else(|| StorageError::Unsupported(format!("unknown column family `{name}`"))) + } + + /* helper: CF handle from NodeIndex ------------------------------------- */ + #[inline(always)] + fn subtree_cf(&self, index: NodeIndex) -> &rocksdb::ColumnFamily { + let name = cf_for_depth(index.depth()); + self.cf_handle(name).expect("CF handle missing") + } +} + +impl SmtStorage for RocksDbStorage { + /// Retrieves the SMT root hash from the `METADATA_CF` column family. + /// + /// # Errors + /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error + /// occurs. + /// - `StorageError::DeserializationError`: If the retrieved root hash bytes cannot be + /// deserialized. + fn get_root(&self) -> Result, StorageError> { + let cf = self.cf_handle(METADATA_CF)?; + match self.db.get_cf(cf, ROOT_KEY)? { + Some(bytes) => { + let digest = Word::read_from_bytes(&bytes)?; + Ok(Some(digest)) + }, + None => Ok(None), + } + } + + /// Stores the SMT root hash in the `METADATA_CF` column family. + /// + /// # Errors + /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error + /// occurs. + fn set_root(&self, root: Word) -> Result<(), StorageError> { + let cf = self.cf_handle(METADATA_CF)?; + self.db.put_cf(cf, ROOT_KEY, root.to_bytes())?; + Ok(()) + } + + /// Retrieves the total count of non-empty leaves from the `METADATA_CF` column family. + /// Returns 0 if the count is not found. + /// + /// # Errors + /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error + /// occurs. + /// - `StorageError::BadValueLen`: If the retrieved count bytes are invalid. + fn leaf_count(&self) -> Result { + let cf = self.cf_handle(METADATA_CF)?; + self.db.get_cf(cf, LEAF_COUNT_KEY)?.map_or(Ok(0), |bytes| { + let arr: [u8; 8] = + bytes.as_slice().try_into().map_err(|_| StorageError::BadValueLen { + what: "leaf count", + expected: 8, + found: bytes.len(), + })?; + Ok(usize::from_be_bytes(arr)) + }) + } + + /// Retrieves the total count of key-value entries from the `METADATA_CF` column family. + /// Returns 0 if the count is not found. + /// + /// # Errors + /// - `StorageError::Backend`: If the metadata column family is missing or a RocksDB error + /// occurs. + /// - `StorageError::BadValueLen`: If the retrieved count bytes are invalid. + fn entry_count(&self) -> Result { + let cf = self.cf_handle(METADATA_CF)?; + self.db.get_cf(cf, ENTRY_COUNT_KEY)?.map_or(Ok(0), |bytes| { + let arr: [u8; 8] = + bytes.as_slice().try_into().map_err(|_| StorageError::BadValueLen { + what: "entry count", + expected: 8, + found: bytes.len(), + })?; + Ok(usize::from_be_bytes(arr)) + }) + } + + /// Inserts a key-value pair into the SMT leaf at the specified logical `index`. + /// + /// This operation involves: + /// 1. Retrieving the current leaf (if any) at `index`. + /// 2. Inserting the new key-value pair into the leaf. + /// 3. Updating the leaf and entry counts in the metadata column family. + /// 4. Writing all changes (leaf data, counts) to RocksDB in a single batch. + /// + /// Note: This only updates the leaf. Callers are responsible for recomputing and + /// persisting the corresponding inner nodes. + /// + /// # Errors + /// - `StorageError::Backend`: If column families are missing or a RocksDB error occurs. + /// - `StorageError::DeserializationError`: If existing leaf data is corrupt. + fn insert_value( + &self, + index: u64, + key: Word, + value: Word, + ) -> Result, StorageError> { + debug_assert_ne!(value, EMPTY_WORD); + + let mut batch = WriteBatch::default(); + + // Fetch initial counts. + let mut current_leaf_count = self.leaf_count()?; + let mut current_entry_count = self.entry_count()?; + + let leaves_cf = self.cf_handle(LEAVES_CF)?; + let db_key = Self::index_db_key(index); + + let maybe_leaf = self.get_leaf(index)?; + + let value_to_return: Option = match maybe_leaf { + Some(mut existing_leaf) => { + let old_value = existing_leaf.insert(key, value).expect("Failed to insert value"); + // Determine if the overall SMT entry_count needs to change. + // entry_count increases if: + // 1. The key was not present in this leaf before (`old_value` is `None`). + // 2. The key was present but held `EMPTY_WORD` (`old_value` is + // `Some(EMPTY_WORD)`). + if old_value.is_none_or(|old_v| old_v == EMPTY_WORD) { + current_entry_count += 1; + } + // current_leaf_count does not change because the leaf itself already existed. + batch.put_cf(leaves_cf, db_key, existing_leaf.to_bytes()); + old_value + }, + None => { + // Leaf at `index` does not exist, so create a new one. + let new_leaf = SmtLeaf::Single((key, value)); + // A new leaf is created. + current_leaf_count += 1; + // This new leaf contains one new SMT entry. + current_entry_count += 1; + batch.put_cf(leaves_cf, db_key, new_leaf.to_bytes()); + // No previous value, as the leaf (and thus the key in it) was new. + None + }, + }; + + // Add updated metadata counts to the batch. + let metadata_cf = self.cf_handle(METADATA_CF)?; + batch.put_cf(metadata_cf, LEAF_COUNT_KEY, current_leaf_count.to_be_bytes()); + batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, current_entry_count.to_be_bytes()); + + // Atomically write all changes (leaf data and metadata counts). + self.db.write(batch)?; + + Ok(value_to_return) + } + + /// Removes a key-value pair from the SMT leaf at the specified logical `index`. + /// + /// This operation involves: + /// 1. Retrieving the leaf at `index`. + /// 2. Removing the `key` from the leaf. If the leaf becomes empty, it's deleted from RocksDB. + /// 3. Updating the leaf and entry counts in the metadata column family. + /// 4. Writing all changes (leaf data/deletion, counts) to RocksDB in a single batch. + /// + /// Returns `Ok(None)` if the leaf at `index` does not exist or the `key` is not found. + /// + /// Note: This only updates the leaf. Callers are responsible for recomputing and + /// persisting the corresponding inner nodes. + /// + /// # Errors + /// - `StorageError::Backend`: If column families are missing or a RocksDB error occurs. + /// - `StorageError::DeserializationError`: If existing leaf data is corrupt. + fn remove_value(&self, index: u64, key: Word) -> Result, StorageError> { + let Some(mut leaf) = self.get_leaf(index)? else { + return Ok(None); + }; + + let mut batch = WriteBatch::default(); + let cf = self.cf_handle(LEAVES_CF)?; + let metadata_cf = self.cf_handle(METADATA_CF)?; + let db_key = Self::index_db_key(index); + let mut entry_count = self.entry_count()?; + let mut leaf_count = self.leaf_count()?; + + let (current_value, is_empty) = leaf.remove(key); + if let Some(current_value) = current_value + && current_value != EMPTY_WORD + { + entry_count -= 1; + } + if is_empty { + leaf_count -= 1; + batch.delete_cf(cf, db_key); + } else { + batch.put_cf(cf, db_key, leaf.to_bytes()); + } + batch.put_cf(metadata_cf, LEAF_COUNT_KEY, leaf_count.to_be_bytes()); + batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, entry_count.to_be_bytes()); + self.db.write(batch)?; + Ok(current_value) + } + + /// Retrieves a single SMT leaf node by its logical `index` from the `LEAVES_CF` column family. + /// + /// # Errors + /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. + /// - `StorageError::DeserializationError`: If the retrieved leaf data is corrupt. + fn get_leaf(&self, index: u64) -> Result, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let key = Self::index_db_key(index); + match self.db.get_cf(cf, key)? { + Some(bytes) => { + let leaf = SmtLeaf::read_from_bytes(&bytes)?; + Ok(Some(leaf)) + }, + None => Ok(None), + } + } + + /// Sets or updates multiple SMT leaf nodes in the `LEAVES_CF` column family. + /// + /// This method performs a batch write to RocksDB. It also updates the global + /// leaf and entry counts in the `METADATA_CF` based on the provided `leaves` map, + /// overwriting any previous counts. + /// + /// Note: This method assumes the provided `leaves` map represents the entirety + /// of leaves to be stored or that counts are being explicitly reset. + /// Note: This only updates the leaves. Callers are responsible for recomputing and + /// persisting the corresponding inner nodes. + /// + /// # Errors + /// - `StorageError::Backend`: If column families are missing or a RocksDB error occurs. + fn set_leaves(&self, leaves: Map) -> Result<(), StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let leaf_count: usize = leaves.len(); + let entry_count: usize = leaves.values().map(|leaf| leaf.entries().len()).sum(); + let mut batch = WriteBatch::default(); + for (idx, leaf) in leaves { + let key = Self::index_db_key(idx); + let value = leaf.to_bytes(); + batch.put_cf(cf, key, &value); + } + let metadata_cf = self.cf_handle(METADATA_CF)?; + batch.put_cf(metadata_cf, LEAF_COUNT_KEY, leaf_count.to_be_bytes()); + batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, entry_count.to_be_bytes()); + self.db.write(batch)?; + Ok(()) + } + + /// Removes a single SMT leaf node by its logical `index` from the `LEAVES_CF` column family. + /// + /// Important: This method currently *does not* update the global leaf and entry counts + /// in the metadata. Callers are responsible for managing these counts separately + /// if using this method directly, or preferably use `apply` or `remove_value` which handle + /// counts. + /// + /// Note: This only removes the leaf. Callers are responsible for recomputing and + /// persisting the corresponding inner nodes. + /// + /// # Errors + /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. + /// - `StorageError::DeserializationError`: If the retrieved (to be returned) leaf data is + /// corrupt. + fn remove_leaf(&self, index: u64) -> Result, StorageError> { + let key = Self::index_db_key(index); + let cf = self.cf_handle(LEAVES_CF)?; + let old_bytes = self.db.get_cf(cf, key)?; + self.db.delete_cf(cf, key)?; + Ok(old_bytes + .map(|bytes| SmtLeaf::read_from_bytes(&bytes).expect("failed to deserialize leaf"))) + } + + /// Retrieves multiple SMT leaf nodes by their logical `indices` using RocksDB's `multi_get_cf`. + /// + /// # Errors + /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs. + /// - `StorageError::DeserializationError`: If any retrieved leaf data is corrupt. + fn get_leaves(&self, indices: &[u64]) -> Result>, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let db_keys: Vec<[u8; 8]> = indices.iter().map(|&idx| Self::index_db_key(idx)).collect(); + let results = self.db.multi_get_cf(db_keys.iter().map(|k| (cf, k.as_ref()))); + + results + .into_iter() + .map(|result| match result { + Ok(Some(bytes)) => Ok(Some(SmtLeaf::read_from_bytes(&bytes)?)), + Ok(None) => Ok(None), + Err(e) => Err(e.into()), + }) + .collect() + } + + /// Returns true if the storage has any leaves. + /// + /// # Errors + /// Returns `StorageError` if the storage read operation fails. + fn has_leaves(&self) -> Result { + Ok(self.leaf_count()? > 0) + } + + /// Batch-retrieves multiple subtrees from RocksDB by their node indices. + /// + /// This method groups requests by subtree depth into column family buckets, + /// then performs parallel `multi_get` operations to efficiently retrieve + /// all subtrees. Results are deserialized and placed in the same order as + /// the input indices. + /// + /// Note: Retrieval is performed in parallel. If multiple errors occur (e.g., + /// deserialization or backend errors), only the first one encountered is returned. + /// Other errors will be discarded. + /// + /// # Parameters + /// - `indices`: A slice of subtree root indices to retrieve. + /// + /// # Returns + /// - A `Vec>` where each index corresponds to the original input. + /// - `Ok(...)` if all fetches succeed. + /// - `Err(StorageError)` if any RocksDB access or deserialization fails. + fn get_subtree(&self, index: NodeIndex) -> Result, StorageError> { + let cf = self.subtree_cf(index); + let key = Self::subtree_db_key(index); + match self.db.get_cf(cf, key)? { + Some(bytes) => { + let subtree = Subtree::from_vec(index, &bytes)?; + Ok(Some(subtree)) + }, + None => Ok(None), + } + } + + /// Batch-retrieves multiple subtrees from RocksDB by their node indices. + /// + /// This method groups requests by subtree depth into column family buckets, + /// then performs parallel `multi_get` operations to efficiently retrieve + /// all subtrees. Results are deserialized and placed in the same order as + /// the input indices. + /// + /// # Parameters + /// - `indices`: A slice of subtree root indices to retrieve. + /// + /// # Returns + /// - A `Vec>` where each index corresponds to the original input. + /// - `Ok(...)` if all fetches succeed. + /// - `Err(StorageError)` if any RocksDB access or deserialization fails. + fn get_subtrees(&self, indices: &[NodeIndex]) -> Result>, StorageError> { + use rayon::prelude::*; + + let mut depth_buckets: [Vec<(usize, NodeIndex)>; 5] = Default::default(); + + for (original_index, &node_index) in indices.iter().enumerate() { + let depth = node_index.depth(); + let bucket_index = match depth { + 56 => 0, + 48 => 1, + 40 => 2, + 32 => 3, + 24 => 4, + _ => { + return Err(StorageError::Unsupported(format!( + "unsupported subtree depth {depth}" + ))); + }, + }; + depth_buckets[bucket_index].push((original_index, node_index)); + } + let mut results = vec![None; indices.len()]; + + // Process depth buckets in parallel + let bucket_results: Result, StorageError> = depth_buckets + .into_par_iter() + .enumerate() + .filter(|(_, bucket)| !bucket.is_empty()) + .map( + |(bucket_index, bucket)| -> Result)>, StorageError> { + let depth = LargeSmt::::SUBTREE_DEPTHS[bucket_index]; + let cf = self.cf_handle(cf_for_depth(depth))?; + let keys: Vec<_> = + bucket.iter().map(|(_, idx)| Self::subtree_db_key(*idx)).collect(); + + let db_results = self.db.multi_get_cf(keys.iter().map(|k| (cf, k.as_ref()))); + + // Process results for this bucket + bucket + .into_iter() + .zip(db_results) + .map(|((original_index, node_index), db_result)| { + let subtree = match db_result { + Ok(Some(bytes)) => Some(Subtree::from_vec(node_index, &bytes)?), + Ok(None) => None, + Err(e) => return Err(e.into()), + }; + Ok((original_index, subtree)) + }) + .collect() + }, + ) + .collect(); + + // Flatten results and place them in correct positions + for bucket_result in bucket_results? { + for (original_index, subtree) in bucket_result { + results[original_index] = subtree; + } + } + + Ok(results) + } + + /// Stores a single subtree in RocksDB and optionally updates the depth-24 root cache. + /// + /// The subtree is serialized and written to its corresponding column family. + /// If it's a depth-24 subtree, the root node’s hash is also stored in the + /// dedicated `DEPTH_24_CF` cache to support top-level reconstruction. + /// + /// # Parameters + /// - `subtree`: A reference to the subtree to be stored. + /// + /// # Errors + /// - Returns `StorageError` if column family lookup, serialization, or the write operation + /// fails. + fn set_subtree(&self, subtree: &Subtree) -> Result<(), StorageError> { + let subtrees_cf = self.subtree_cf(subtree.root_index()); + let mut batch = WriteBatch::default(); + + let key = Self::subtree_db_key(subtree.root_index()); + let value = subtree.to_vec(); + batch.put_cf(subtrees_cf, key, value); + + // Also update level 24 hash cache if this is a level 24 subtree + if subtree.root_index().depth() == IN_MEMORY_DEPTH { + let root_hash = subtree + .get_inner_node(subtree.root_index()) + .ok_or_else(|| StorageError::Unsupported("Subtree root node not found".into()))? + .hash(); + + let depth24_cf = self.cf_handle(DEPTH_24_CF)?; + let hash_key = Self::index_db_key(subtree.root_index().value()); + batch.put_cf(depth24_cf, hash_key, root_hash.to_bytes()); + } + + self.db.write(batch)?; + Ok(()) + } + + /// Bulk-writes subtrees to storage (bypassing WAL). + /// + /// This method writes a vector of serialized `Subtree` objects directly to their + /// corresponding RocksDB column families based on their root index. + /// + /// ⚠️ **Warning:** This function should only be used during **initial SMT construction**. + /// It disables the WAL, meaning writes are **not crash-safe** and can result in data loss + /// if the process terminates unexpectedly. + /// + /// # Parameters + /// - `subtrees`: A vector of `Subtree` objects to be serialized and persisted. + /// + /// # Errors + /// - Returns `StorageError::Backend` if any column family lookup or RocksDB write fails. + fn set_subtrees(&self, subtrees: Vec) -> Result<(), StorageError> { + let depth24_cf = self.cf_handle(DEPTH_24_CF)?; + let mut batch = WriteBatch::default(); + + for subtree in subtrees { + let subtrees_cf = self.subtree_cf(subtree.root_index()); + let key = Self::subtree_db_key(subtree.root_index()); + let value = subtree.to_vec(); + batch.put_cf(subtrees_cf, key, value); + + if subtree.root_index().depth() == IN_MEMORY_DEPTH + && let Some(root_node) = subtree.get_inner_node(subtree.root_index()) + { + let hash_key = Self::index_db_key(subtree.root_index().value()); + batch.put_cf(depth24_cf, hash_key, root_node.hash().to_bytes()); + } + } + + self.db.write(batch)?; + Ok(()) + } + + /// Removes a single SMT Subtree from storage, identified by its root `NodeIndex`. + /// + /// # Errors + /// - `StorageError::Backend`: If the subtrees column family is missing or a RocksDB error + /// occurs. + fn remove_subtree(&self, index: NodeIndex) -> Result<(), StorageError> { + let subtrees_cf = self.subtree_cf(index); + let mut batch = WriteBatch::default(); + + let key = Self::subtree_db_key(index); + batch.delete_cf(subtrees_cf, key); + + // Also remove level 24 hash cache if this is a level 24 subtree + if index.depth() == IN_MEMORY_DEPTH { + let depth24_cf = self.cf_handle(DEPTH_24_CF)?; + let hash_key = Self::index_db_key(index.value()); + batch.delete_cf(depth24_cf, hash_key); + } + + self.db.write(batch)?; + Ok(()) + } + + /// Retrieves a single inner node (non-leaf node) from within a Subtree. + /// + /// This method is intended for accessing nodes at depths greater than or equal to + /// `IN_MEMORY_DEPTH`. It first finds the appropriate Subtree containing the `index`, then + /// delegates to `Subtree::get_inner_node()`. + /// + /// # Errors + /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. + /// - `StorageError::Value`: If the containing Subtree data is corrupt. + fn get_inner_node(&self, index: NodeIndex) -> Result, StorageError> { + if index.depth() < IN_MEMORY_DEPTH { + return Err(StorageError::Unsupported( + "Cannot get inner node from upper part of the tree".into(), + )); + } + let subtree_root_index = Subtree::find_subtree_root(index); + Ok(self + .get_subtree(subtree_root_index)? + .and_then(|subtree| subtree.get_inner_node(index))) + } + + /// Sets or updates a single inner node (non-leaf node) within a Subtree. + /// + /// This method is intended for `index.depth() >= IN_MEMORY_DEPTH`. + /// If the target Subtree does not exist, it is created. The `node` is then + /// inserted into the Subtree, and the modified Subtree is written back to storage. + /// + /// # Errors + /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. + /// - `StorageError::Value`: If existing Subtree data is corrupt. + fn set_inner_node( + &self, + index: NodeIndex, + node: InnerNode, + ) -> Result, StorageError> { + if index.depth() < IN_MEMORY_DEPTH { + return Err(StorageError::Unsupported( + "Cannot set inner node in upper part of the tree".into(), + )); + } + + let subtree_root_index = Subtree::find_subtree_root(index); + let mut subtree = self + .get_subtree(subtree_root_index)? + .unwrap_or_else(|| Subtree::new(subtree_root_index)); + let old_node = subtree.insert_inner_node(index, node); + self.set_subtree(&subtree)?; + Ok(old_node) + } + + /// Removes a single inner node (non-leaf node) from within a Subtree. + /// + /// This method is intended for `index.depth() >= IN_MEMORY_DEPTH`. + /// If the Subtree becomes empty after removing the node, the Subtree itself + /// is removed from storage. + /// + /// # Errors + /// - `StorageError::Backend`: If `index.depth() < IN_MEMORY_DEPTH`, or if RocksDB errors occur. + /// - `StorageError::Value`: If existing Subtree data is corrupt. + fn remove_inner_node(&self, index: NodeIndex) -> Result, StorageError> { + if index.depth() < IN_MEMORY_DEPTH { + return Err(StorageError::Unsupported( + "Cannot remove inner node from upper part of the tree".into(), + )); + } + + let subtree_root_index = Subtree::find_subtree_root(index); + self.get_subtree(subtree_root_index) + .and_then(|maybe_subtree| match maybe_subtree { + Some(mut subtree) => { + let old_node = subtree.remove_inner_node(index); + let db_operation_result = if subtree.is_empty() { + self.remove_subtree(subtree_root_index) + } else { + self.set_subtree(&subtree) + }; + db_operation_result.map(|_| old_node) + }, + None => Ok(None), + }) + } + + /// Applies a batch of `StorageUpdates` atomically to the RocksDB backend. + /// + /// This is the primary method for persisting changes to the SMT. It constructs a single + /// RocksDB `WriteBatch` containing all specified changes: + /// - Leaf updates/deletions in `LEAVES_CF`. + /// - Subtree updates/deletions in `SUBTREE_24_CF`, `SUBTREE_32_CF`, `SUBTREE_40_CF`, + /// `SUBTREE_48_CF`, `SUBTREE_56_CF`. + /// - Updates to leaf and entry counts in `METADATA_CF` based on `leaf_count_delta` and + /// `entry_count_delta`. + /// - Sets the new SMT root in `METADATA_CF`. + /// + /// All operations in the batch are applied atomically by RocksDB. + /// + /// # Errors + /// - `StorageError::Backend`: If any column family is missing or a RocksDB write error occurs. + fn apply(&self, updates: StorageUpdates) -> Result<(), StorageError> { + use rayon::prelude::*; + + let mut batch = WriteBatch::default(); + + let leaves_cf = self.cf_handle(LEAVES_CF)?; + let metadata_cf = self.cf_handle(METADATA_CF)?; + let depth24_cf = self.cf_handle(DEPTH_24_CF)?; + + let StorageUpdateParts { + leaf_updates, + subtree_updates, + new_root, + leaf_count_delta, + entry_count_delta, + } = updates.into_parts(); + + // Process leaf updates + for (index, maybe_leaf) in leaf_updates { + let key = Self::index_db_key(index); + match maybe_leaf { + Some(leaf) => batch.put_cf(leaves_cf, key, leaf.to_bytes()), + None => batch.delete_cf(leaves_cf, key), + } + } + + // Helper for depth 24 operations + let is_depth_24 = |index: NodeIndex| index.depth() == IN_MEMORY_DEPTH; + + // Parallel preparation of subtree operations + let subtree_ops: Result, StorageError> = subtree_updates + .into_par_iter() + .map(|update| -> Result<_, StorageError> { + let (index, maybe_bytes, depth24_op) = match update { + SubtreeUpdate::Store { index, subtree } => { + let bytes = subtree.to_vec(); + let depth24_op = is_depth_24(index) + .then(|| subtree.get_inner_node(index)) + .flatten() + .map(|root_node| { + let hash_key = Self::index_db_key(index.value()); + (hash_key, Some(root_node.hash().to_bytes())) + }); + (index, Some(bytes), depth24_op) + }, + SubtreeUpdate::Delete { index } => { + let depth24_op = is_depth_24(index).then(|| { + let hash_key = Self::index_db_key(index.value()); + (hash_key, None) + }); + (index, None, depth24_op) + }, + }; + + let key = Self::subtree_db_key(index); + let subtrees_cf = self.subtree_cf(index); + + Ok((subtrees_cf, key, maybe_bytes, depth24_op)) + }) + .collect(); + + // Sequential batch building + for (subtrees_cf, key, maybe_bytes, depth24_op) in subtree_ops? { + match maybe_bytes { + Some(bytes) => batch.put_cf(subtrees_cf, key, bytes), + None => batch.delete_cf(subtrees_cf, key), + } + + if let Some((hash_key, maybe_hash_bytes)) = depth24_op { + match maybe_hash_bytes { + Some(hash_bytes) => batch.put_cf(depth24_cf, hash_key, hash_bytes), + None => batch.delete_cf(depth24_cf, hash_key), + } + } + } + + if leaf_count_delta != 0 || entry_count_delta != 0 { + let current_leaf_count = self.leaf_count()?; + let current_entry_count = self.entry_count()?; + + let new_leaf_count = current_leaf_count.saturating_add_signed(leaf_count_delta); + let new_entry_count = current_entry_count.saturating_add_signed(entry_count_delta); + + batch.put_cf(metadata_cf, LEAF_COUNT_KEY, new_leaf_count.to_be_bytes()); + batch.put_cf(metadata_cf, ENTRY_COUNT_KEY, new_entry_count.to_be_bytes()); + } + + batch.put_cf(metadata_cf, ROOT_KEY, new_root.to_bytes()); + + let mut write_opts = rocksdb::WriteOptions::default(); + // Disable immediate WAL sync to disk for better performance + write_opts.set_sync(false); + self.db.write_opt(batch, &write_opts)?; + + Ok(()) + } + + /// Returns an iterator over all (logical u64 index, `SmtLeaf`) pairs in the `LEAVES_CF`. + /// + /// The iterator uses a RocksDB snapshot for consistency and iterates in lexicographical + /// order of the keys (leaf indices). Errors during iteration (e.g., deserialization issues) + /// cause the iterator to skip the problematic item and attempt to continue. + /// + /// # Errors + /// - `StorageError::Backend`: If the leaves column family is missing or a RocksDB error occurs + /// during iterator creation. + fn iter_leaves(&self) -> Result + '_>, StorageError> { + let cf = self.cf_handle(LEAVES_CF)?; + let mut read_opts = ReadOptions::default(); + read_opts.set_total_order_seek(true); + let db_iter = self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start); + + Ok(Box::new(RocksDbDirectLeafIterator { iter: db_iter })) + } + + /// Returns an iterator over all `Subtree` instances across all subtree column families. + /// + /// The iterator uses a RocksDB snapshot and iterates in lexicographical order of keys + /// (subtree root NodeIndex) across all depth column families (24, 32, 40, 48, 56). + /// Errors during iteration (e.g., deserialization issues) cause the iterator to skip + /// the problematic item and attempt to continue. + /// + /// # Errors + /// - `StorageError::Backend`: If any subtree column family is missing or a RocksDB error occurs + /// during iterator creation. + fn iter_subtrees(&self) -> Result + '_>, StorageError> { + // All subtree column family names in order + const SUBTREE_CFS: [&str; 5] = + [SUBTREE_24_CF, SUBTREE_32_CF, SUBTREE_40_CF, SUBTREE_48_CF, SUBTREE_56_CF]; + + let mut cf_handles = Vec::new(); + for cf_name in SUBTREE_CFS { + cf_handles.push(self.cf_handle(cf_name)?); + } + + Ok(Box::new(RocksDbSubtreeIterator::new(&self.db, cf_handles))) + } + + /// Retrieves all depth 24 hashes for fast tree rebuilding. + /// + /// # Errors + /// - `StorageError::Backend`: If the depth24 column family is missing or a RocksDB error + /// occurs. + /// - `StorageError::Value`: If any hash bytes are corrupt. + fn get_depth24(&self) -> Result, StorageError> { + let cf = self.cf_handle(DEPTH_24_CF)?; + let iter = self.db.iterator_cf(cf, IteratorMode::Start); + let mut hashes = Vec::new(); + + for item in iter { + let (key_bytes, value_bytes) = item?; + + let index = index_from_key_bytes(&key_bytes)?; + let hash = Word::read_from_bytes(&value_bytes)?; + + hashes.push((index, hash)); + } + + Ok(hashes) + } +} + +/// Syncs the RocksDB database to disk before dropping the storage. +/// +/// This ensures that all data is persisted to disk before the storage is dropped. +/// +/// # Panics +/// - If the RocksDB sync operation fails. +impl Drop for RocksDbStorage { + fn drop(&mut self) { + if let Err(e) = self.sync() { + panic!("failed to flush RocksDB on drop: {e}"); + } + } +} + +// ITERATORS +// -------------------------------------------------------------------------------------------- + +/// An iterator over leaves directly from RocksDB. +/// +/// Wraps a `DBIteratorWithThreadMode` and handles deserialization of keys to `u64` (leaf index) +/// and values to `SmtLeaf`. Skips items that fail to deserialize or if a RocksDB error occurs +/// for an item, attempting to continue iteration. +struct RocksDbDirectLeafIterator<'a> { + iter: DBIteratorWithThreadMode<'a, DB>, +} + +impl Iterator for RocksDbDirectLeafIterator<'_> { + type Item = (u64, SmtLeaf); + + fn next(&mut self) -> Option { + self.iter.find_map(|result| { + let (key_bytes, value_bytes) = result.ok()?; + let leaf_idx = index_from_key_bytes(&key_bytes).ok()?; + let leaf = SmtLeaf::read_from_bytes(&value_bytes).ok()?; + Some((leaf_idx, leaf)) + }) + } +} + +/// An iterator over subtrees from multiple RocksDB column families. +/// +/// Iterates through all subtree column families (24, 32, 40, 48, 56) sequentially. +/// When one column family is exhausted, it moves to the next one. +struct RocksDbSubtreeIterator<'a> { + db: &'a DB, + cf_handles: Vec<&'a rocksdb::ColumnFamily>, + current_cf_index: usize, + current_iter: Option>, +} + +impl<'a> RocksDbSubtreeIterator<'a> { + fn new(db: &'a DB, cf_handles: Vec<&'a rocksdb::ColumnFamily>) -> Self { + let mut iterator = Self { + db, + cf_handles, + current_cf_index: 0, + current_iter: None, + }; + iterator.advance_to_next_cf(); + iterator + } + + fn advance_to_next_cf(&mut self) { + if self.current_cf_index < self.cf_handles.len() { + let cf = self.cf_handles[self.current_cf_index]; + let mut read_opts = ReadOptions::default(); + read_opts.set_total_order_seek(true); + self.current_iter = Some(self.db.iterator_cf_opt(cf, read_opts, IteratorMode::Start)); + } else { + self.current_iter = None; + } + } + + fn try_next_from_iter( + iter: &mut DBIteratorWithThreadMode, + cf_index: usize, + ) -> Option { + iter.find_map(|result| { + let (key_bytes, value_bytes) = result.ok()?; + let depth = 24 + (cf_index * 8) as u8; + + let node_idx = subtree_root_from_key_bytes(&key_bytes, depth).ok()?; + let value_vec = value_bytes.into_vec(); + Subtree::from_vec(node_idx, &value_vec).ok() + }) + } +} + +impl Iterator for RocksDbSubtreeIterator<'_> { + type Item = Subtree; + + fn next(&mut self) -> Option { + loop { + let iter = self.current_iter.as_mut()?; + + // Try to get the next valid subtree from current iterator + if let Some(subtree) = Self::try_next_from_iter(iter, self.current_cf_index) { + return Some(subtree); + } + + // Current CF exhausted, advance to next + self.current_cf_index += 1; + self.advance_to_next_cf(); + + // If no more CFs, we're done + self.current_iter.as_ref()?; + } + } +} + +// ROCKSDB CONFIGURATION +// -------------------------------------------------------------------------------------------- + +/// Configuration for RocksDB storage used by the Sparse Merkle Tree implementation. +/// +/// This struct contains the essential configuration parameters needed to initialize +/// and optimize RocksDB for SMT storage operations. It provides sensible defaults +/// while allowing customization for specific performance requirements. +#[derive(Debug, Clone)] +pub struct RocksDbConfig { + /// The filesystem path where the RocksDB database will be stored. + /// + /// This should be a directory path that the application has read/write permissions for. + /// The database will create multiple files in this directory to store data, logs, and + /// metadata. + pub(crate) path: PathBuf, + + /// The size of the RocksDB block cache in bytes. + /// + /// This cache stores frequently accessed data blocks in memory to improve read performance. + /// Larger cache sizes generally improve read performance but consume more memory. + /// Default: 1GB (1 << 30 bytes) + pub(crate) cache_size: usize, + + /// The maximum number of files that RocksDB can have open simultaneously. + /// + /// This setting affects both memory usage and the number of file descriptors used by the + /// process. Higher values may improve performance for databases with many SST files but + /// increase resource usage. Default: 512 files + pub(crate) max_open_files: i32, +} + +impl RocksDbConfig { + /// Creates a new RocksDbConfig with the given database path and default settings. + /// + /// # Arguments + /// * `path` - The filesystem path where the RocksDB database will be stored. This can be any + /// type that converts into a `PathBuf`. + /// + /// # Default Settings + /// * `cache_size`: 1GB (1,073,741,824 bytes) + /// * `max_open_files`: 512 + /// + /// # Examples + /// ``` + /// use miden_crypto::merkle::smt::RocksDbConfig; + /// + /// let config = RocksDbConfig::new("/path/to/database"); + /// ``` + pub fn new>(path: P) -> Self { + Self { + path: path.into(), + cache_size: 1 << 30, + max_open_files: 512, + } + } + + /// Sets the block cache size for RocksDB. + /// + /// The block cache stores frequently accessed data blocks in memory to improve read + /// performance. Larger cache sizes generally improve read performance but consume more + /// memory. + /// + /// # Arguments + /// * `size` - The cache size in bytes. + /// + /// # Examples + /// ``` + /// use miden_crypto::merkle::smt::RocksDbConfig; + /// + /// let config = RocksDbConfig::new("/path/to/database") + /// .with_cache_size(2 * 1024 * 1024 * 1024); // 2GB cache + /// ``` + pub fn with_cache_size(mut self, size: usize) -> Self { + self.cache_size = size; + self + } + + /// Sets the maximum number of files that RocksDB can have open simultaneously. + /// + /// This setting affects both memory usage and the number of file descriptors used by the + /// process. Higher values may improve performance for databases with many SST files but + /// increase resource usage. + /// + /// # Arguments + /// * `count` - The maximum number of open files. Must be positive. + /// + /// # Examples + /// ``` + /// use miden_crypto::merkle::smt::RocksDbConfig; + /// + /// let config = RocksDbConfig::new("/path/to/database") + /// .with_max_open_files(1024); // Allow up to 1024 open files + /// ``` + pub fn with_max_open_files(mut self, count: i32) -> Self { + self.max_open_files = count; + self + } +} + +// SUBTREE DB KEY +// -------------------------------------------------------------------------------------------- + +/// Compact key wrapper for variable-length subtree prefixes. +/// +/// * `bytes` always holds the big-endian 8-byte value. +/// * `len` is how many leading bytes are significant (3-7). +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] +pub(crate) struct KeyBytes { + bytes: [u8; 8], + len: u8, +} + +impl KeyBytes { + #[inline(always)] + pub fn new(value: u64, keep: usize) -> Self { + debug_assert!((3..=7).contains(&keep)); + let bytes = value.to_be_bytes(); + debug_assert!(bytes[..8 - keep].iter().all(|&b| b == 0)); + Self { bytes, len: keep as u8 } + } + + #[inline(always)] + pub fn as_slice(&self) -> &[u8] { + &self.bytes[8 - self.len as usize..] + } +} + +impl AsRef<[u8]> for KeyBytes { + #[inline(always)] + fn as_ref(&self) -> &[u8] { + self.as_slice() + } +} + +// HELPERS +// -------------------------------------------------------------------------------------------- + +/// Deserializes an index (u64) from a RocksDB key byte slice. +/// Expects `key_bytes` to be exactly 8 bytes long. +/// +/// # Errors +/// - `StorageError::BadKeyLen`: If `key_bytes` is not 8 bytes long or conversion fails. +fn index_from_key_bytes(key_bytes: &[u8]) -> Result { + if key_bytes.len() != 8 { + return Err(StorageError::BadKeyLen { expected: 8, found: key_bytes.len() }); + } + let mut arr = [0u8; 8]; + arr.copy_from_slice(key_bytes); + Ok(u64::from_be_bytes(arr)) +} + +/// Reconstructs a `NodeIndex` from the variable-length subtree key stored in RocksDB. +/// +/// * `key_bytes` is the big-endian tail of the 64-bit value: +/// - depth 56 → 7 bytes +/// - depth 48 → 6 bytes +/// - depth 40 → 5 bytes +/// - depth 32 → 4 bytes +/// - depth 24 → 3 bytes +/// +/// # Errors +/// * `StorageError::Unsupported` - `depth` is not one of 24/32/40/48/56. +/// * `StorageError::DeserializationError` - `key_bytes.len()` does not match the length required by +/// `depth`. +#[inline(always)] +fn subtree_root_from_key_bytes(key_bytes: &[u8], depth: u8) -> Result { + let expected = match depth { + 24 => 3, + 32 => 4, + 40 => 5, + 48 => 6, + 56 => 7, + d => return Err(StorageError::Unsupported(format!("unsupported subtree depth {d}"))), + }; + + if key_bytes.len() != expected { + return Err(StorageError::BadSubtreeKeyLen { depth, expected, found: key_bytes.len() }); + } + let mut buf = [0u8; 8]; + buf[8 - expected..].copy_from_slice(key_bytes); + let value = u64::from_be_bytes(buf); + Ok(NodeIndex::new_unchecked(depth, value)) +} + +/// Helper that maps an SMT depth to its column family. +#[inline(always)] +fn cf_for_depth(depth: u8) -> &'static str { + match depth { + 24 => SUBTREE_24_CF, + 32 => SUBTREE_32_CF, + 40 => SUBTREE_40_CF, + 48 => SUBTREE_48_CF, + 56 => SUBTREE_56_CF, + _ => panic!("unsupported subtree depth: {depth}"), + } +} + +impl From for StorageError { + fn from(e: rocksdb::Error) -> Self { + StorageError::Backend(Box::new(e)) + } +} diff --git a/miden-crypto/src/merkle/smt/large/subtree/mod.rs b/miden-crypto/src/merkle/smt/large/subtree/mod.rs index b669407f07..bc7348fb3a 100644 --- a/miden-crypto/src/merkle/smt/large/subtree/mod.rs +++ b/miden-crypto/src/merkle/smt/large/subtree/mod.rs @@ -12,7 +12,7 @@ pub use error::SubtreeError; #[cfg(test)] mod tests; -/// Represents a complete 8-depth subtree that is serialized into a single storage entry. +/// Represents a complete 8-depth subtree that is serialized into a single RocksDB entry. /// /// ### What is stored /// - `nodes` tracks only **non-empty inner nodes** of this subtree (i.e., nodes for which at least diff --git a/miden-crypto/src/merkle/store/mod.rs b/miden-crypto/src/merkle/store/mod.rs index 5d0a379df2..9d0fe575ad 100644 --- a/miden-crypto/src/merkle/store/mod.rs +++ b/miden-crypto/src/merkle/store/mod.rs @@ -192,7 +192,7 @@ impl MerkleStore { /// existence verification is needed. pub fn has_path(&self, root: Word, index: NodeIndex) -> bool { // check if the root exists - if !self.nodes.contains_key(&root) { + if self.nodes.get(&root).is_none() { return false; } diff --git a/miden-crypto/tests/rocksdb_large_smt.rs b/miden-crypto/tests/rocksdb_large_smt.rs new file mode 100644 index 0000000000..e677b918ac --- /dev/null +++ b/miden-crypto/tests/rocksdb_large_smt.rs @@ -0,0 +1,147 @@ +use miden_crypto::{ + EMPTY_WORD, Felt, ONE, WORD_SIZE, Word, + merkle::{ + InnerNodeInfo, + smt::{LargeSmt, RocksDbConfig, RocksDbStorage}, + }, +}; +use tempfile::TempDir; + +fn setup_storage() -> (RocksDbStorage, TempDir) { + let temp_dir = tempfile::Builder::new() + .prefix("test_smt_rocksdb_") + .tempdir() + .expect("Failed to create temporary directory for RocksDB test"); + + let db_path = temp_dir.path().to_path_buf(); + + let storage = RocksDbStorage::open(RocksDbConfig::new(db_path)) + .expect("Failed to open RocksDbStorage in temporary directory"); + (storage, temp_dir) +} + +fn generate_entries(pair_count: usize) -> Vec<(Word, Word)> { + (0..pair_count) + .map(|i| { + let key = Word::new([ONE, ONE, Felt::new(i as u64), Felt::new(i as u64 % 1000)]); + let value = Word::new([ONE, ONE, ONE, Felt::new(i as u64)]); + (key, value) + }) + .collect() +} + +#[test] +fn rocksdb_sanity_insert_and_get() { + let (storage, _tmp) = setup_storage(); + let mut smt = LargeSmt::::new(storage).unwrap(); + + let key = Word::new([ONE, ONE, ONE, ONE]); + let val = Word::new([ONE; WORD_SIZE]); + + let prev = smt.insert(key, val).unwrap(); + assert_eq!(prev, EMPTY_WORD); + assert_eq!(smt.get_value(&key), val); +} + +#[test] +fn rocksdb_persistence_reopen() { + let entries = generate_entries(1000); + + let (initial_storage, temp_dir_guard) = setup_storage(); + let db_path = temp_dir_guard.path().to_path_buf(); + + let smt = LargeSmt::::with_entries(initial_storage, entries).unwrap(); + let root = smt.root(); + + let mut inner_nodes: Vec = smt.inner_nodes().unwrap().collect(); + inner_nodes.sort_by_key(|info| info.value); + drop(smt); + + let reopened_storage = RocksDbStorage::open(RocksDbConfig::new(db_path)).unwrap(); + let smt = LargeSmt::::new(reopened_storage).unwrap(); + + let mut inner_nodes_2: Vec = smt.inner_nodes().unwrap().collect(); + inner_nodes_2.sort_by_key(|info| info.value); + + assert_eq!(inner_nodes.len(), inner_nodes_2.len()); + assert_eq!(inner_nodes, inner_nodes_2); + assert_eq!(smt.root(), root); +} + +#[test] +fn rocksdb_persistence_after_insertion() { + let entries = generate_entries(1000); + + let (initial_storage, temp_dir_guard) = setup_storage(); + let db_path = temp_dir_guard.path().to_path_buf(); + + let mut smt = LargeSmt::::with_entries(initial_storage, entries).unwrap(); + let key = Word::new([ONE, ONE, ONE, ONE]); + let new_value = Word::new([Felt::new(2), Felt::new(2), Felt::new(2), Felt::new(2)]); + smt.insert(key, new_value).unwrap(); + let root = smt.root(); + + let mut inner_nodes: Vec = smt.inner_nodes().unwrap().collect(); + inner_nodes.sort_by_key(|info| info.value); + drop(smt); + + let reopened_storage = RocksDbStorage::open(RocksDbConfig::new(db_path)).unwrap(); + let smt = LargeSmt::::new(reopened_storage).unwrap(); + + let mut inner_nodes_2: Vec = smt.inner_nodes().unwrap().collect(); + inner_nodes_2.sort_by_key(|info| info.value); + + assert_eq!(inner_nodes.len(), inner_nodes_2.len()); + assert_eq!(inner_nodes, inner_nodes_2); + assert_eq!(smt.root(), root); +} + +#[test] +fn rocksdb_persistence_after_insert_batch_with_deletions() { + // Create a tree with initial entries + let entries = generate_entries(10_000); + + let (initial_storage, temp_dir_guard) = setup_storage(); + let db_path = temp_dir_guard.path().to_path_buf(); + + let mut smt = LargeSmt::::with_entries(initial_storage, entries).unwrap(); + + // Create a batch that includes both insertions and deletions + let mut batch_entries: Vec<(Word, Word)> = Vec::new(); + + // Add new entries + for i in 20_000..25_000 { + let key = Word::new([ONE, ONE, Felt::new(i as u64), Felt::new(i as u64 % 1000)]); + let value = Word::new([ONE, ONE, ONE, Felt::new(i as u64)]); + batch_entries.push((key, value)); + } + + // Delete some existing entries + for i in 0..1000 { + let key = Word::new([ONE, ONE, Felt::new(i as u64), Felt::new(i as u64 % 1000)]); + batch_entries.push((key, EMPTY_WORD)); + } + + smt.insert_batch(batch_entries).unwrap(); + let root = smt.root(); + + let mut inner_nodes: Vec = smt.inner_nodes().unwrap().collect(); + inner_nodes.sort_by_key(|info| info.value); + let num_leaves = smt.num_leaves().unwrap(); + let num_entries = smt.num_entries().unwrap(); + drop(smt); + + let reopened_storage = RocksDbStorage::open(RocksDbConfig::new(db_path)).unwrap(); + let smt = LargeSmt::::new(reopened_storage).unwrap(); + + let mut inner_nodes_2: Vec = smt.inner_nodes().unwrap().collect(); + inner_nodes_2.sort_by_key(|info| info.value); + let num_leaves_2 = smt.num_leaves().unwrap(); + let num_entries_2 = smt.num_entries().unwrap(); + + assert_eq!(inner_nodes.len(), inner_nodes_2.len()); + assert_eq!(inner_nodes, inner_nodes_2); + assert_eq!(num_leaves, num_leaves_2); + assert_eq!(num_entries, num_entries_2); + assert_eq!(smt.root(), root, "Tree reconstruction failed - root mismatch after deletions"); +}