diff --git a/icechunk-python/src/config.rs b/icechunk-python/src/config.rs index fd0f1a953..fbf593828 100644 --- a/icechunk-python/src/config.rs +++ b/icechunk-python/src/config.rs @@ -715,6 +715,7 @@ impl From<&PyCachingConfig> for CachingConfig { num_transaction_changes: value.num_transaction_changes, num_bytes_attributes: value.num_bytes_attributes, num_bytes_chunks: value.num_bytes_chunks, + backend: Default::default(), } } } diff --git a/icechunk/src/asset_manager.rs b/icechunk/src/asset_manager.rs index f2e3101ba..7355aa983 100644 --- a/icechunk/src/asset_manager.rs +++ b/icechunk/src/asset_manager.rs @@ -2,7 +2,6 @@ use async_stream::try_stream; use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::{Stream, TryStreamExt}; -use quick_cache::{Weighter, sync::Cache}; use serde::{Deserialize, Serialize}; use std::{ io::{BufReader, Read}, @@ -14,6 +13,7 @@ use tracing::{Span, debug, instrument, trace, warn}; use crate::{ Storage, + cache::{CacheBackend, EphemeralCache, NoOpCache, StorageCache}, config::CachingConfig, format::{ ChunkId, ChunkOffset, IcechunkFormatErrorKind, ManifestId, SnapshotId, @@ -42,6 +42,7 @@ pub struct AssetManager { num_bytes_attributes: u64, num_bytes_chunks: u64, compression_level: u8, + cache_backend: CacheBackend, max_concurrent_requests: u16, @@ -51,13 +52,13 @@ pub struct AssetManager { snapshot_cache_size_warned: AtomicBool, #[serde(skip)] - snapshot_cache: Cache, FileWeighter>, + snapshot_cache: Arc>>, #[serde(skip)] - manifest_cache: Cache, FileWeighter>, + manifest_cache: Arc>>, #[serde(skip)] - transactions_cache: Cache, FileWeighter>, + transactions_cache: Arc>>, #[serde(skip)] - chunk_cache: Cache<(ChunkId, Range), Bytes, FileWeighter>, + chunk_cache: Arc), Bytes>>, #[serde(skip)] request_semaphore: Semaphore, @@ -75,6 +76,8 @@ struct AssetManagerSerializer { num_bytes_attributes: u64, num_bytes_chunks: u64, compression_level: u8, + #[serde(default)] + cache_backend: CacheBackend, max_concurrent_requests: u16, } @@ -90,6 +93,7 @@ impl From for AssetManager { value.num_bytes_chunks, value.compression_level, value.max_concurrent_requests, + value.cache_backend, ) } } @@ -106,7 +110,24 @@ impl AssetManager { num_bytes_chunks: u64, compression_level: u8, max_concurrent_requests: u16, + cache_backend: CacheBackend, ) -> Self { + // Create caches based on backend type + let (snapshot_cache, manifest_cache, transactions_cache, chunk_cache) = match cache_backend { + CacheBackend::Ephemeral => ( + Arc::new(EphemeralCache::new(num_snapshot_nodes)) as Arc>>, + Arc::new(EphemeralCache::new(num_chunk_refs)) as Arc>>, + Arc::new(EphemeralCache::new(num_transaction_changes)) as Arc>>, + Arc::new(EphemeralCache::new(num_bytes_chunks)) as Arc), Bytes>>, + ), + CacheBackend::NoOp => ( + Arc::new(NoOpCache::new()) as Arc>>, + Arc::new(NoOpCache::new()) as Arc>>, + Arc::new(NoOpCache::new()) as Arc>>, + Arc::new(NoOpCache::new()) as Arc), Bytes>>, + ), + }; + Self { num_snapshot_nodes, num_chunk_refs, @@ -114,17 +135,14 @@ impl AssetManager { num_bytes_attributes, num_bytes_chunks, compression_level, + cache_backend, max_concurrent_requests, storage, storage_settings, - snapshot_cache: Cache::with_weighter(1, num_snapshot_nodes, FileWeighter), - manifest_cache: Cache::with_weighter(1, num_chunk_refs, FileWeighter), - transactions_cache: Cache::with_weighter( - 0, - num_transaction_changes, - FileWeighter, - ), - chunk_cache: Cache::with_weighter(0, num_bytes_chunks, FileWeighter), + snapshot_cache, + manifest_cache, + transactions_cache, + chunk_cache, snapshot_cache_size_warned: AtomicBool::new(false), manifest_cache_size_warned: AtomicBool::new(false), request_semaphore: Semaphore::new(max_concurrent_requests as usize), @@ -147,6 +165,7 @@ impl AssetManager { 0, compression_level, max_concurrent_requests, + CacheBackend::NoOp, ) } @@ -167,6 +186,7 @@ impl AssetManager { config.num_bytes_chunks(), compression_level, max_concurrent_requests, + config.backend(), ) } @@ -208,22 +228,40 @@ impl AssetManager { manifest_id: &ManifestId, manifest_size: u64, ) -> RepositoryResult> { - match self.manifest_cache.get_value_or_guard_async(manifest_id).await { - Ok(manifest) => Ok(manifest), - Err(guard) => { + let storage = Arc::clone(&self.storage); + let settings = self.storage_settings.clone(); + let semaphore = &self.request_semaphore; + let manifest_id_clone = manifest_id.clone(); + + let manifest = self.manifest_cache.get_or_fetch( + manifest_id.clone(), + Box::pin(async move { let manifest = fetch_manifest( - manifest_id, + &manifest_id_clone, manifest_size, - self.storage.as_ref(), - &self.storage_settings, - &self.request_semaphore, + storage.as_ref(), + &settings, + semaphore, ) - .await?; - self.warn_if_manifest_cache_small(manifest.as_ref()); - let _fail_is_ok = guard.insert(Arc::clone(&manifest)); + .await + .map_err(|e| Box::new(e) as Box)?; Ok(manifest) + }) + ) + .await + // If we got an error from the cache, it's a RepositoryError already + .map_err(|e| { + // Downcast the error back to RepositoryError if possible + match e.downcast::() { + Ok(repo_err) => *repo_err, + Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError( + crate::storage::StorageErrorKind::Other(other_err.to_string()) + )), } - } + })?; + + self.warn_if_manifest_cache_small(manifest.as_ref()); + Ok(manifest) } fn warn_if_manifest_cache_small(&self, manifest: &Manifest) { @@ -288,21 +326,37 @@ impl AssetManager { &self, snapshot_id: &SnapshotId, ) -> RepositoryResult> { - match self.snapshot_cache.get_value_or_guard_async(snapshot_id).await { - Ok(snapshot) => Ok(snapshot), - Err(guard) => { + let storage = Arc::clone(&self.storage); + let settings = self.storage_settings.clone(); + let semaphore = &self.request_semaphore; + let snapshot_id_clone = snapshot_id.clone(); + + let snapshot = self.snapshot_cache.get_or_fetch( + snapshot_id.clone(), + Box::pin(async move { let snapshot = fetch_snapshot( - snapshot_id, - self.storage.as_ref(), - &self.storage_settings, - &self.request_semaphore, + &snapshot_id_clone, + storage.as_ref(), + &settings, + semaphore, ) - .await?; - self.warn_if_snapshot_cache_small(snapshot.as_ref()); - let _fail_is_ok = guard.insert(Arc::clone(&snapshot)); + .await + .map_err(|e| Box::new(e) as Box)?; Ok(snapshot) + }) + ) + .await + .map_err(|e| { + match e.downcast::() { + Ok(repo_err) => *repo_err, + Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError( + crate::storage::StorageErrorKind::Other(other_err.to_string()) + )), } - } + })?; + + self.warn_if_snapshot_cache_small(snapshot.as_ref()); + Ok(snapshot) } #[instrument(skip(self, log))] @@ -330,20 +384,36 @@ impl AssetManager { &self, transaction_id: &SnapshotId, ) -> RepositoryResult> { - match self.transactions_cache.get_value_or_guard_async(transaction_id).await { - Ok(transaction) => Ok(transaction), - Err(guard) => { + let storage = Arc::clone(&self.storage); + let settings = self.storage_settings.clone(); + let semaphore = &self.request_semaphore; + let transaction_id_clone = transaction_id.clone(); + + let transaction = self.transactions_cache.get_or_fetch( + transaction_id.clone(), + Box::pin(async move { let transaction = fetch_transaction_log( - transaction_id, - self.storage.as_ref(), - &self.storage_settings, - &self.request_semaphore, + &transaction_id_clone, + storage.as_ref(), + &settings, + semaphore, ) - .await?; - let _fail_is_ok = guard.insert(Arc::clone(&transaction)); + .await + .map_err(|e| Box::new(e) as Box)?; Ok(transaction) + }) + ) + .await + .map_err(|e| { + match e.downcast::() { + Ok(repo_err) => *repo_err, + Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError( + crate::storage::StorageErrorKind::Other(other_err.to_string()) + )), } - } + })?; + + Ok(transaction) } #[instrument(skip(self, bytes))] @@ -365,20 +435,37 @@ impl AssetManager { range: &Range, ) -> RepositoryResult { let key = (chunk_id.clone(), range.clone()); - match self.chunk_cache.get_value_or_guard_async(&key).await { - Ok(chunk) => Ok(chunk), - Err(guard) => { - trace!(%chunk_id, ?range, "Downloading chunk"); - let permit = self.request_semaphore.acquire().await?; - let chunk = self - .storage - .fetch_chunk(&self.storage_settings, chunk_id, range) - .await?; + let storage = Arc::clone(&self.storage); + let settings = self.storage_settings.clone(); + let semaphore = &self.request_semaphore; + let chunk_id_clone = chunk_id.clone(); + let range_clone = range.clone(); + + let chunk = self.chunk_cache.get_or_fetch( + key, + Box::pin(async move { + trace!(%chunk_id_clone, ?range_clone, "Downloading chunk"); + let permit = semaphore.acquire().await + .map_err(|e| Box::new(e) as Box)?; + let chunk = storage + .fetch_chunk(&settings, &chunk_id_clone, &range_clone) + .await + .map_err(|e| Box::new(e) as Box)?; drop(permit); - let _fail_is_ok = guard.insert(chunk.clone()); Ok(chunk) + }) + ) + .await + .map_err(|e| { + match e.downcast::() { + Ok(repo_err) => *repo_err, + Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError( + crate::storage::StorageErrorKind::Other(other_err.to_string()) + )), } - } + })?; + + Ok(chunk) } /// Returns the sequence of parents of the current session, in order of latest first. @@ -773,32 +860,6 @@ async fn fetch_transaction_log( .map(Arc::new) } -#[derive(Debug, Clone)] -struct FileWeighter; - -impl Weighter> for FileWeighter { - fn weight(&self, _: &ManifestId, val: &Arc) -> u64 { - val.len() as u64 - } -} - -impl Weighter> for FileWeighter { - fn weight(&self, _: &SnapshotId, val: &Arc) -> u64 { - val.len() as u64 - } -} - -impl Weighter<(ChunkId, Range), Bytes> for FileWeighter { - fn weight(&self, _: &(ChunkId, Range), val: &Bytes) -> u64 { - val.len() as u64 - } -} - -impl Weighter> for FileWeighter { - fn weight(&self, _: &SnapshotId, val: &Arc) -> u64 { - val.len() as u64 - } -} #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] @@ -937,6 +998,7 @@ mod test { num_transaction_changes: Some(0), num_bytes_attributes: Some(0), num_bytes_chunks: Some(0), + backend: crate::cache::CacheBackend::Ephemeral, }, 1, 100, diff --git a/icechunk/src/cache/ephemeral.rs b/icechunk/src/cache/ephemeral.rs new file mode 100644 index 000000000..4a59bd220 --- /dev/null +++ b/icechunk/src/cache/ephemeral.rs @@ -0,0 +1,159 @@ +//! Ephemeral (in-memory) cache backend implementation. +//! +//! This module provides an in-memory LRU cache with weight-based eviction using +//! the `quick_cache` library. This is the default cache backend for Icechunk. + +use bytes::Bytes; +use quick_cache::{Weighter, sync::Cache}; +use std::future::Future; +use std::hash::Hash; +use std::pin::Pin; +use std::sync::Arc; + +use super::StorageCache; +use crate::format::{manifest::Manifest, snapshot::Snapshot, transaction_log::TransactionLog}; + +/// A trait for types that can provide their byte size for cache weighting. +/// +/// This trait is used to determine how much "weight" each cached item has, +/// which determines how many items can fit in the cache before eviction occurs. +pub trait Weighable { + /// Returns the weight of this item in bytes. + /// + /// For most types, this is the actual byte size of the data structure. + fn weight(&self) -> u64; +} + +// Implement Weighable for the types used in AssetManager +impl Weighable for Arc { + fn weight(&self) -> u64 { + self.len() as u64 + } +} + +impl Weighable for Arc { + fn weight(&self) -> u64 { + self.len() as u64 + } +} + +impl Weighable for Arc { + fn weight(&self) -> u64 { + self.len() as u64 + } +} + +impl Weighable for Bytes { + fn weight(&self) -> u64 { + self.len() as u64 + } +} + +/// An in-memory LRU cache with weight-based eviction. +/// +/// This cache uses the `quick_cache` library to provide fast, thread-safe +/// access with automatic eviction based on item weights. Items are evicted +/// using a Least Recently Used (LRU) policy when the total weight exceeds +/// the configured capacity. +/// +/// # Features +/// +/// - **Weight-based eviction**: Items are weighted by their byte size, allowing +/// precise control over memory usage. +/// - **Concurrent request deduplication**: Multiple concurrent requests for the +/// same uncached key will only trigger one fetch operation. +/// - **Thread-safe**: Safe to use across multiple async tasks. +/// +/// # Type Parameters +/// +/// - `K`: The key type (must be `Hash + Eq + Clone`) +/// - `V`: The value type (must implement [`Weighable`]) +#[derive(Debug)] +pub struct EphemeralCache +where + K: Hash + Eq + Clone + Send + Sync + 'static, + V: Weighable + Clone + Send + Sync + 'static, +{ + cache: Cache, +} + +impl EphemeralCache +where + K: Hash + Eq + Clone + Send + Sync + 'static, + V: Weighable + Clone + Send + Sync + 'static, +{ + /// Creates a new ephemeral cache with the specified capacity. + /// + /// # Parameters + /// + /// - `capacity`: The maximum total weight (in bytes) of all cached items. + /// When this limit is exceeded, the least recently used items are evicted. + /// + /// # Examples + /// + /// ```rust,ignore + /// use icechunk::cache::EphemeralCache; + /// + /// // Create a cache that can hold up to 1GB of data + /// let cache = EphemeralCache::new(1_000_000_000); + /// ``` + pub fn new(capacity: u64) -> Self { + Self { + cache: Cache::with_weighter( + capacity.try_into().unwrap_or(usize::MAX), + capacity, + GenericWeighter, + ), + } + } +} + +impl StorageCache for EphemeralCache +where + K: Hash + Eq + Clone + Send + Sync + std::fmt::Debug + 'static, + V: Weighable + Clone + Send + Sync + std::fmt::Debug + 'static, +{ + fn get_or_fetch<'a>( + &'a self, + key: K, + fetcher: Pin>> + Send + 'a>>, + ) -> Pin>> + Send + 'a>> { + Box::pin(async move { + match self.cache.get_value_or_guard_async(&key).await { + Ok(value) => Ok(value), + Err(guard) => { + let value = fetcher.await?; + let _ = guard.insert(value.clone()); + Ok(value) + } + } + }) + } + + fn insert(&self, key: K, value: V) { + self.cache.insert(key, value); + } + + fn remove(&self, key: &K) { + self.cache.remove(key); + } + + fn clear(&self) { + self.cache.clear(); + } +} + +/// A generic weighter that delegates to the Weighable trait. +#[derive(Debug, Clone)] +struct GenericWeighter; + +impl Weighter for GenericWeighter +where + K: Hash + Eq, + V: Weighable, +{ + fn weight(&self, _key: &K, value: &V) -> u64 { + value.weight() + } +} + diff --git a/icechunk/src/cache/mod.rs b/icechunk/src/cache/mod.rs new file mode 100644 index 000000000..08239961e --- /dev/null +++ b/icechunk/src/cache/mod.rs @@ -0,0 +1,138 @@ +//! Pluggable cache backend infrastructure for Icechunk. +//! +//! This module provides a trait-based architecture for cache backends that allows +//! different caching strategies to be used interchangeably. The cache is used by +//! the [`AssetManager`](crate::asset_manager::AssetManager) to cache manifests, +//! snapshots, transaction logs, and chunks. +//! +//! # Available Backends +//! +//! - **Ephemeral** ([`EphemeralCache`]): In-memory LRU cache with weight-based eviction. +//! This is the default backend and provides fast access with configurable memory limits. +//! Concurrent requests for the same uncached item are automatically deduplicated. +//! +//! - **NoOp** ([`NoOpCache`]): A pass-through cache that performs no caching. Useful for +//! debugging or when caching is not desired. +//! +//! # Examples +//! +//! The cache backend is configured via [`CachingConfig`](crate::config::CachingConfig): +//! +//! ```rust,no_run +//! use icechunk::{config::CachingConfig, cache::CacheBackend}; +//! +//! let config = CachingConfig { +//! num_snapshot_nodes: Some(500_000), +//! num_chunk_refs: Some(15_000_000), +//! backend: CacheBackend::Ephemeral, +//! ..Default::default() +//! }; +//! ``` +//! +//! # Implementing Custom Backends +//! +//! To implement a custom cache backend (e.g., Redis, filesystem), implement the +//! [`StorageCache`] trait and add a new variant to [`CacheBackend`]. + +use serde::{Deserialize, Serialize}; +use std::future::Future; +use std::hash::Hash; +use std::pin::Pin; + +pub mod ephemeral; +pub mod noop; + +pub use ephemeral::EphemeralCache; +pub use noop::NoOpCache; + +/// A trait for pluggable cache backends that support async operations, +/// weight-based eviction, and concurrent request deduplication. +/// +/// This trait is object-safe (uses boxed futures) to allow dynamic dispatch +/// through `Arc>`. +/// +/// # Type Parameters +/// +/// - `K`: The cache key type (must be hashable and cloneable) +/// - `V`: The cached value type (must be cloneable) +/// +/// # Thread Safety +/// +/// Implementations must be `Send + Sync` to allow use across async tasks. +pub trait StorageCache: Send + Sync + std::fmt::Debug +where + K: Hash + Eq + Clone + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ + /// Gets a value from the cache, or fetches it using the provided future if not present. + /// + /// Concurrent requests for the same key should be deduplicated - only one fetch + /// operation should occur, with all waiters receiving the same result. + /// + /// # Parameters + /// + /// - `key`: The cache key to look up + /// - `fetcher`: An async function that fetches the value if not in cache + /// + /// # Returns + /// + /// The cached or fetched value, or an error if the fetch operation fails. + fn get_or_fetch<'a>( + &'a self, + key: K, + fetcher: Pin>> + Send + 'a>>, + ) -> Pin>> + Send + 'a>>; + + /// Inserts a value into the cache with the given key. + /// + /// If the key already exists, the value is updated. If the cache is full, + /// the implementation determines which entry to evict. + fn insert(&self, key: K, value: V); + + /// Removes a value from the cache. + /// + /// If the key doesn't exist, this is a no-op. + fn remove(&self, key: &K); + + /// Clears all entries from the cache. + fn clear(&self); +} + +/// Enum representing different cache backend implementations. +/// +/// This enum is used in [`CachingConfig`](crate::config::CachingConfig) to select +/// which cache backend to use for the repository. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CacheBackend { + /// In-memory LRU cache with weight-based eviction (default). + /// + /// Provides fast access with configurable memory limits. Cache entries are + /// evicted based on their size (weight) using an LRU policy. This backend + /// automatically deduplicates concurrent requests for the same uncached item. + /// + /// This is the default backend and provides the best performance for most use cases. + Ephemeral, + + /// No-op cache that performs no caching. + /// + /// All requests are passed directly to the storage backend without caching. + /// Useful for debugging, testing, or when caching overhead is not desired. + NoOp, +} + +impl Default for CacheBackend { + fn default() -> Self { + CacheBackend::Ephemeral + } +} + +impl CacheBackend { + /// Returns a human-readable name for the cache backend. + pub fn name(&self) -> &'static str { + match self { + CacheBackend::Ephemeral => "ephemeral", + CacheBackend::NoOp => "noop", + } + } +} diff --git a/icechunk/src/cache/noop.rs b/icechunk/src/cache/noop.rs new file mode 100644 index 000000000..3062525b3 --- /dev/null +++ b/icechunk/src/cache/noop.rs @@ -0,0 +1,83 @@ +//! No-op cache backend implementation. +//! +//! This module provides a cache implementation that performs no caching at all. +//! All requests are passed directly through to the underlying storage backend. + +use std::future::Future; +use std::hash::Hash; +use std::marker::PhantomData; +use std::pin::Pin; + +use super::StorageCache; + +/// A no-op cache that performs no caching. +/// +/// This cache implementation is a pass-through that never stores any values. +/// All requests are forwarded directly to the fetcher function, making it +/// equivalent to having no cache at all. +/// +/// # Use Cases +/// +/// - **Debugging**: Disable caching to test storage backend performance +/// - **Testing**: Ensure tests exercise the full storage path +/// - **Low-memory environments**: Avoid cache overhead when memory is constrained +/// +/// # Type Parameters +/// +/// - `K`: The key type (not actually used, but required for trait compatibility) +/// - `V`: The value type (not actually used, but required for trait compatibility) +#[derive(Debug)] +pub struct NoOpCache { + _phantom: PhantomData<(K, V)>, +} + +impl NoOpCache +where + K: Hash + Eq + Clone + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ + /// Creates a new no-op cache. + pub fn new() -> Self { + Self { + _phantom: PhantomData, + } + } +} + +impl Default for NoOpCache +where + K: Hash + Eq + Clone + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ + fn default() -> Self { + Self::new() + } +} + +impl StorageCache for NoOpCache +where + K: Hash + Eq + Clone + Send + Sync + std::fmt::Debug + 'static, + V: Clone + Send + Sync + std::fmt::Debug + 'static, +{ + fn get_or_fetch<'a>( + &'a self, + _key: K, + fetcher: Pin>> + Send + 'a>>, + ) -> Pin>> + Send + 'a>> { + // Always fetch, never cache + fetcher + } + + fn insert(&self, _key: K, _value: V) { + // No-op + } + + fn remove(&self, _key: &K) { + // No-op + } + + fn clear(&self) { + // No-op + } +} + diff --git a/icechunk/src/config.rs b/icechunk/src/config.rs index 659e7ecc1..3f5775dd8 100644 --- a/icechunk/src/config.rs +++ b/icechunk/src/config.rs @@ -91,13 +91,73 @@ impl CompressionConfig { } } +/// Configuration for the repository's caching behavior. +/// +/// Icechunk caches various metadata and data to improve performance. This struct +/// controls the size limits and backend type for the cache. +/// +/// # Cache Types +/// +/// Icechunk maintains separate caches for different data types: +/// +/// - **Snapshot nodes**: Cached based on number of nodes +/// - **Chunk references**: Cached based on number of references in manifests +/// - **Transaction changes**: Cached based on number of changes +/// - **Attributes**: Cached based on total bytes +/// - **Chunks**: Cached based on total bytes +/// +/// # Cache Backend +/// +/// The `backend` field selects which cache implementation to use: +/// +/// - [`CacheBackend::Ephemeral`](crate::cache::CacheBackend::Ephemeral) (default): +/// In-memory LRU cache with weight-based eviction +/// - [`CacheBackend::NoOp`](crate::cache::CacheBackend::NoOp): +/// Disables caching entirely +/// +/// # Examples +/// +/// ```rust +/// use icechunk::config::CachingConfig; +/// use icechunk::cache::CacheBackend; +/// +/// // Create a config with default values +/// let config = CachingConfig::default(); +/// +/// // Create a custom config +/// let config = CachingConfig { +/// num_snapshot_nodes: Some(1_000_000), +/// num_chunk_refs: Some(20_000_000), +/// backend: CacheBackend::Ephemeral, +/// ..Default::default() +/// }; +/// ``` #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Default)] pub struct CachingConfig { + /// Maximum number of snapshot nodes to cache. + /// Default: 500,000 pub num_snapshot_nodes: Option, + + /// Maximum number of chunk references to cache from manifests. + /// Default: 15,000,000 pub num_chunk_refs: Option, + + /// Maximum number of transaction changes to cache. + /// Default: 0 (disabled) pub num_transaction_changes: Option, + + /// Maximum bytes of attribute data to cache. + /// Default: 0 (disabled) pub num_bytes_attributes: Option, + + /// Maximum bytes of chunk data to cache. + /// Default: 0 (disabled) pub num_bytes_chunks: Option, + + /// The cache backend implementation to use. + /// Default: [`CacheBackend::Ephemeral`](crate::cache::CacheBackend::Ephemeral) + #[serde(default)] + pub backend: crate::cache::CacheBackend, } impl CachingConfig { @@ -117,6 +177,10 @@ impl CachingConfig { self.num_bytes_chunks.unwrap_or(0) } + pub fn backend(&self) -> crate::cache::CacheBackend { + self.backend + } + pub fn merge(&self, other: Self) -> Self { Self { num_snapshot_nodes: other.num_snapshot_nodes.or(self.num_snapshot_nodes), @@ -128,6 +192,7 @@ impl CachingConfig { .num_bytes_attributes .or(self.num_bytes_attributes), num_bytes_chunks: other.num_bytes_chunks.or(self.num_bytes_chunks), + backend: other.backend, } } } diff --git a/icechunk/src/lib.rs b/icechunk/src/lib.rs index 31aef15f6..f12d28bdd 100644 --- a/icechunk/src/lib.rs +++ b/icechunk/src/lib.rs @@ -18,6 +18,7 @@ //! - The datastructures are represented by concrete types in the [`mod@format`] modules. //! These datastructures use Arrow RecordBatches for representation. pub mod asset_manager; +pub mod cache; pub mod change_set; pub mod cli; pub mod config; diff --git a/icechunk/src/strategies.rs b/icechunk/src/strategies.rs index aca635f27..ed658ecd0 100644 --- a/icechunk/src/strategies.rs +++ b/icechunk/src/strategies.rs @@ -158,6 +158,7 @@ prop_compose! { num_transaction_changes, num_bytes_attributes, num_bytes_chunks, + backend: crate::cache::CacheBackend::Ephemeral, } } }