diff --git a/icechunk-python/src/config.rs b/icechunk-python/src/config.rs
index fd0f1a953..fbf593828 100644
--- a/icechunk-python/src/config.rs
+++ b/icechunk-python/src/config.rs
@@ -715,6 +715,7 @@ impl From<&PyCachingConfig> for CachingConfig {
             num_transaction_changes: value.num_transaction_changes,
             num_bytes_attributes: value.num_bytes_attributes,
             num_bytes_chunks: value.num_bytes_chunks,
+            backend: Default::default(),
         }
     }
 }
diff --git a/icechunk/src/asset_manager.rs b/icechunk/src/asset_manager.rs
index f2e3101ba..7355aa983 100644
--- a/icechunk/src/asset_manager.rs
+++ b/icechunk/src/asset_manager.rs
@@ -2,7 +2,6 @@ use async_stream::try_stream;
 use bytes::Bytes;
 use chrono::{DateTime, Utc};
 use futures::{Stream, TryStreamExt};
-use quick_cache::{Weighter, sync::Cache};
 use serde::{Deserialize, Serialize};
 use std::{
     io::{BufReader, Read},
@@ -14,6 +13,7 @@ use tracing::{Span, debug, instrument, trace, warn};
 
 use crate::{
     Storage,
+    cache::{CacheBackend, EphemeralCache, NoOpCache, StorageCache},
     config::CachingConfig,
     format::{
         ChunkId, ChunkOffset, IcechunkFormatErrorKind, ManifestId, SnapshotId,
@@ -42,6 +42,7 @@ pub struct AssetManager {
     num_bytes_attributes: u64,
     num_bytes_chunks: u64,
     compression_level: u8,
+    cache_backend: CacheBackend,
 
     max_concurrent_requests: u16,
 
@@ -51,13 +52,13 @@ pub struct AssetManager {
     snapshot_cache_size_warned: AtomicBool,
 
     #[serde(skip)]
-    snapshot_cache: Cache<SnapshotId, Arc<Snapshot>, FileWeighter>,
+    snapshot_cache: Arc<dyn StorageCache<SnapshotId, Arc<Snapshot>>>,
     #[serde(skip)]
-    manifest_cache: Cache<ManifestId, Arc<Manifest>, FileWeighter>,
+    manifest_cache: Arc<dyn StorageCache<ManifestId, Arc<Manifest>>>,
     #[serde(skip)]
-    transactions_cache: Cache<SnapshotId, Arc<TransactionLog>, FileWeighter>,
+    transactions_cache: Arc<dyn StorageCache<SnapshotId, Arc<TransactionLog>>>,
     #[serde(skip)]
-    chunk_cache: Cache<(ChunkId, Range<ChunkOffset>), Bytes, FileWeighter>,
+    chunk_cache: Arc<dyn StorageCache<(ChunkId, Range<ChunkOffset>), Bytes>>,
 
     #[serde(skip)]
     request_semaphore: Semaphore,
@@ -75,6 +76,8 @@ struct AssetManagerSerializer {
     num_bytes_attributes: u64,
     num_bytes_chunks: u64,
     compression_level: u8,
+    #[serde(default)]
+    cache_backend: CacheBackend,
     max_concurrent_requests: u16,
 }
 
@@ -90,6 +93,7 @@ impl From<AssetManagerSerializer> for AssetManager {
             value.num_bytes_chunks,
             value.compression_level,
             value.max_concurrent_requests,
+            value.cache_backend,
         )
     }
 }
@@ -106,7 +110,24 @@ impl AssetManager {
         num_bytes_chunks: u64,
         compression_level: u8,
         max_concurrent_requests: u16,
+        cache_backend: CacheBackend,
     ) -> Self {
+        // Create caches based on backend type
+        let (snapshot_cache, manifest_cache, transactions_cache, chunk_cache) = match cache_backend {
+            CacheBackend::Ephemeral => (
+                Arc::new(EphemeralCache::new(num_snapshot_nodes)) as Arc<dyn StorageCache<SnapshotId, Arc<Snapshot>>>,
+                Arc::new(EphemeralCache::new(num_chunk_refs)) as Arc<dyn StorageCache<ManifestId, Arc<Manifest>>>,
+                Arc::new(EphemeralCache::new(num_transaction_changes)) as Arc<dyn StorageCache<SnapshotId, Arc<TransactionLog>>>,
+                Arc::new(EphemeralCache::new(num_bytes_chunks)) as Arc<dyn StorageCache<(ChunkId, Range<ChunkOffset>), Bytes>>,
+            ),
+            CacheBackend::NoOp => (
+                Arc::new(NoOpCache::new()) as Arc<dyn StorageCache<SnapshotId, Arc<Snapshot>>>,
+                Arc::new(NoOpCache::new()) as Arc<dyn StorageCache<ManifestId, Arc<Manifest>>>,
+                Arc::new(NoOpCache::new()) as Arc<dyn StorageCache<SnapshotId, Arc<TransactionLog>>>,
+                Arc::new(NoOpCache::new()) as Arc<dyn StorageCache<(ChunkId, Range<ChunkOffset>), Bytes>>,
+            ),
+        };
+
         Self {
             num_snapshot_nodes,
             num_chunk_refs,
@@ -114,17 +135,14 @@ impl AssetManager {
             num_bytes_attributes,
             num_bytes_chunks,
             compression_level,
+            cache_backend,
             max_concurrent_requests,
             storage,
             storage_settings,
-            snapshot_cache: Cache::with_weighter(1, num_snapshot_nodes, FileWeighter),
-            manifest_cache: Cache::with_weighter(1, num_chunk_refs, FileWeighter),
-            transactions_cache: Cache::with_weighter(
-                0,
-                num_transaction_changes,
-                FileWeighter,
-            ),
-            chunk_cache: Cache::with_weighter(0, num_bytes_chunks, FileWeighter),
+            snapshot_cache,
+            manifest_cache,
+            transactions_cache,
+            chunk_cache,
             snapshot_cache_size_warned: AtomicBool::new(false),
             manifest_cache_size_warned: AtomicBool::new(false),
             request_semaphore: Semaphore::new(max_concurrent_requests as usize),
@@ -147,6 +165,7 @@ impl AssetManager {
             0,
             compression_level,
             max_concurrent_requests,
+            CacheBackend::NoOp,
         )
     }
 
@@ -167,6 +186,7 @@ impl AssetManager {
             config.num_bytes_chunks(),
             compression_level,
             max_concurrent_requests,
+            config.backend(),
         )
     }
 
@@ -208,22 +228,40 @@ impl AssetManager {
         manifest_id: &ManifestId,
         manifest_size: u64,
     ) -> RepositoryResult<Arc<Manifest>> {
-        match self.manifest_cache.get_value_or_guard_async(manifest_id).await {
-            Ok(manifest) => Ok(manifest),
-            Err(guard) => {
+        let storage = Arc::clone(&self.storage);
+        let settings = self.storage_settings.clone();
+        let semaphore = &self.request_semaphore;
+        let manifest_id_clone = manifest_id.clone();
+
+        let manifest = self.manifest_cache.get_or_fetch(
+            manifest_id.clone(),
+            Box::pin(async move {
                 let manifest = fetch_manifest(
-                    manifest_id,
+                    &manifest_id_clone,
                     manifest_size,
-                    self.storage.as_ref(),
-                    &self.storage_settings,
-                    &self.request_semaphore,
+                    storage.as_ref(),
+                    &settings,
+                    semaphore,
                 )
-                .await?;
-                self.warn_if_manifest_cache_small(manifest.as_ref());
-                let _fail_is_ok = guard.insert(Arc::clone(&manifest));
+                .await
+                .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
                 Ok(manifest)
+            })
+        )
+        .await
+        // If we got an error from the cache, it's a RepositoryError already
+        .map_err(|e| {
+            // Downcast the error back to RepositoryError if possible
+            match e.downcast::<RepositoryError>() {
+                Ok(repo_err) => *repo_err,
+                Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError(
+                    crate::storage::StorageErrorKind::Other(other_err.to_string())
+                )),
             }
-        }
+        })?;
+
+        self.warn_if_manifest_cache_small(manifest.as_ref());
+        Ok(manifest)
     }
 
     fn warn_if_manifest_cache_small(&self, manifest: &Manifest) {
@@ -288,21 +326,37 @@ impl AssetManager {
         &self,
         snapshot_id: &SnapshotId,
     ) -> RepositoryResult<Arc<Snapshot>> {
-        match self.snapshot_cache.get_value_or_guard_async(snapshot_id).await {
-            Ok(snapshot) => Ok(snapshot),
-            Err(guard) => {
+        let storage = Arc::clone(&self.storage);
+        let settings = self.storage_settings.clone();
+        let semaphore = &self.request_semaphore;
+        let snapshot_id_clone = snapshot_id.clone();
+
+        let snapshot = self.snapshot_cache.get_or_fetch(
+            snapshot_id.clone(),
+            Box::pin(async move {
                 let snapshot = fetch_snapshot(
-                    snapshot_id,
-                    self.storage.as_ref(),
-                    &self.storage_settings,
-                    &self.request_semaphore,
+                    &snapshot_id_clone,
+                    storage.as_ref(),
+                    &settings,
+                    semaphore,
                 )
-                .await?;
-                self.warn_if_snapshot_cache_small(snapshot.as_ref());
-                let _fail_is_ok = guard.insert(Arc::clone(&snapshot));
+                .await
+                .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
                 Ok(snapshot)
+            })
+        )
+        .await
+        .map_err(|e| {
+            match e.downcast::<RepositoryError>() {
+                Ok(repo_err) => *repo_err,
+                Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError(
+                    crate::storage::StorageErrorKind::Other(other_err.to_string())
+                )),
             }
-        }
+        })?;
+
+        self.warn_if_snapshot_cache_small(snapshot.as_ref());
+        Ok(snapshot)
     }
 
     #[instrument(skip(self, log))]
@@ -330,20 +384,36 @@ impl AssetManager {
         &self,
         transaction_id: &SnapshotId,
     ) -> RepositoryResult<Arc<TransactionLog>> {
-        match self.transactions_cache.get_value_or_guard_async(transaction_id).await {
-            Ok(transaction) => Ok(transaction),
-            Err(guard) => {
+        let storage = Arc::clone(&self.storage);
+        let settings = self.storage_settings.clone();
+        let semaphore = &self.request_semaphore;
+        let transaction_id_clone = transaction_id.clone();
+
+        let transaction = self.transactions_cache.get_or_fetch(
+            transaction_id.clone(),
+            Box::pin(async move {
                 let transaction = fetch_transaction_log(
-                    transaction_id,
-                    self.storage.as_ref(),
-                    &self.storage_settings,
-                    &self.request_semaphore,
+                    &transaction_id_clone,
+                    storage.as_ref(),
+                    &settings,
+                    semaphore,
                 )
-                .await?;
-                let _fail_is_ok = guard.insert(Arc::clone(&transaction));
+                .await
+                .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
                 Ok(transaction)
+            })
+        )
+        .await
+        .map_err(|e| {
+            match e.downcast::<RepositoryError>() {
+                Ok(repo_err) => *repo_err,
+                Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError(
+                    crate::storage::StorageErrorKind::Other(other_err.to_string())
+                )),
             }
-        }
+        })?;
+
+        Ok(transaction)
     }
 
     #[instrument(skip(self, bytes))]
@@ -365,20 +435,37 @@ impl AssetManager {
         range: &Range<ChunkOffset>,
     ) -> RepositoryResult<Bytes> {
         let key = (chunk_id.clone(), range.clone());
-        match self.chunk_cache.get_value_or_guard_async(&key).await {
-            Ok(chunk) => Ok(chunk),
-            Err(guard) => {
-                trace!(%chunk_id, ?range, "Downloading chunk");
-                let permit = self.request_semaphore.acquire().await?;
-                let chunk = self
-                    .storage
-                    .fetch_chunk(&self.storage_settings, chunk_id, range)
-                    .await?;
+        let storage = Arc::clone(&self.storage);
+        let settings = self.storage_settings.clone();
+        let semaphore = &self.request_semaphore;
+        let chunk_id_clone = chunk_id.clone();
+        let range_clone = range.clone();
+
+        let chunk = self.chunk_cache.get_or_fetch(
+            key,
+            Box::pin(async move {
+                trace!(%chunk_id_clone, ?range_clone, "Downloading chunk");
+                let permit = semaphore.acquire().await
+                    .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
+                let chunk = storage
+                    .fetch_chunk(&settings, &chunk_id_clone, &range_clone)
+                    .await
+                    .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
                 drop(permit);
-                let _fail_is_ok = guard.insert(chunk.clone());
                 Ok(chunk)
+            })
+        )
+        .await
+        .map_err(|e| {
+            match e.downcast::<RepositoryError>() {
+                Ok(repo_err) => *repo_err,
+                Err(other_err) => RepositoryError::new(RepositoryErrorKind::StorageError(
+                    crate::storage::StorageErrorKind::Other(other_err.to_string())
+                )),
             }
-        }
+        })?;
+
+        Ok(chunk)
     }
 
     /// Returns the sequence of parents of the current session, in order of latest first.
@@ -773,32 +860,6 @@ async fn fetch_transaction_log(
     .map(Arc::new)
 }
 
-#[derive(Debug, Clone)]
-struct FileWeighter;
-
-impl Weighter<ManifestId, Arc<Manifest>> for FileWeighter {
-    fn weight(&self, _: &ManifestId, val: &Arc<Manifest>) -> u64 {
-        val.len() as u64
-    }
-}
-
-impl Weighter<SnapshotId, Arc<Snapshot>> for FileWeighter {
-    fn weight(&self, _: &SnapshotId, val: &Arc<Snapshot>) -> u64 {
-        val.len() as u64
-    }
-}
-
-impl Weighter<(ChunkId, Range<ChunkOffset>), Bytes> for FileWeighter {
-    fn weight(&self, _: &(ChunkId, Range<ChunkOffset>), val: &Bytes) -> u64 {
-        val.len() as u64
-    }
-}
-
-impl Weighter<SnapshotId, Arc<TransactionLog>> for FileWeighter {
-    fn weight(&self, _: &SnapshotId, val: &Arc<TransactionLog>) -> u64 {
-        val.len() as u64
-    }
-}
 
 #[cfg(test)]
 #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)]
@@ -937,6 +998,7 @@ mod test {
                 num_transaction_changes: Some(0),
                 num_bytes_attributes: Some(0),
                 num_bytes_chunks: Some(0),
+                backend: crate::cache::CacheBackend::Ephemeral,
             },
             1,
             100,
diff --git a/icechunk/src/cache/ephemeral.rs b/icechunk/src/cache/ephemeral.rs
new file mode 100644
index 000000000..4a59bd220
--- /dev/null
+++ b/icechunk/src/cache/ephemeral.rs
@@ -0,0 +1,159 @@
+//! Ephemeral (in-memory) cache backend implementation.
+//!
+//! This module provides an in-memory LRU cache with weight-based eviction using
+//! the `quick_cache` library. This is the default cache backend for Icechunk.
+
+use bytes::Bytes;
+use quick_cache::{Weighter, sync::Cache};
+use std::future::Future;
+use std::hash::Hash;
+use std::pin::Pin;
+use std::sync::Arc;
+
+use super::StorageCache;
+use crate::format::{manifest::Manifest, snapshot::Snapshot, transaction_log::TransactionLog};
+
+/// A trait for types that can provide their byte size for cache weighting.
+///
+/// This trait is used to determine how much "weight" each cached item has,
+/// which determines how many items can fit in the cache before eviction occurs.
+pub trait Weighable {
+    /// Returns the weight of this item in bytes.
+    ///
+    /// For most types, this is the actual byte size of the data structure.
+    fn weight(&self) -> u64;
+}
+
+// Implement Weighable for the types used in AssetManager
+impl Weighable for Arc<Manifest> {
+    fn weight(&self) -> u64 {
+        self.len() as u64
+    }
+}
+
+impl Weighable for Arc<Snapshot> {
+    fn weight(&self) -> u64 {
+        self.len() as u64
+    }
+}
+
+impl Weighable for Arc<TransactionLog> {
+    fn weight(&self) -> u64 {
+        self.len() as u64
+    }
+}
+
+impl Weighable for Bytes {
+    fn weight(&self) -> u64 {
+        self.len() as u64
+    }
+}
+
+/// An in-memory LRU cache with weight-based eviction.
+///
+/// This cache uses the `quick_cache` library to provide fast, thread-safe
+/// access with automatic eviction based on item weights. Items are evicted
+/// using a Least Recently Used (LRU) policy when the total weight exceeds
+/// the configured capacity.
+///
+/// # Features
+///
+/// - **Weight-based eviction**: Items are weighted by their byte size, allowing
+///   precise control over memory usage.
+/// - **Concurrent request deduplication**: Multiple concurrent requests for the
+///   same uncached key will only trigger one fetch operation.
+/// - **Thread-safe**: Safe to use across multiple async tasks.
+///
+/// # Type Parameters
+///
+/// - `K`: The key type (must be `Hash + Eq + Clone`)
+/// - `V`: The value type (must implement [`Weighable`])
+#[derive(Debug)]
+pub struct EphemeralCache<K, V>
+where
+    K: Hash + Eq + Clone + Send + Sync + 'static,
+    V: Weighable + Clone + Send + Sync + 'static,
+{
+    cache: Cache<K, V, GenericWeighter>,
+}
+
+impl<K, V> EphemeralCache<K, V>
+where
+    K: Hash + Eq + Clone + Send + Sync + 'static,
+    V: Weighable + Clone + Send + Sync + 'static,
+{
+    /// Creates a new ephemeral cache with the specified capacity.
+    ///
+    /// # Parameters
+    ///
+    /// - `capacity`: The maximum total weight (in bytes) of all cached items.
+    ///   When this limit is exceeded, the least recently used items are evicted.
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// use icechunk::cache::EphemeralCache;
+    ///
+    /// // Create a cache that can hold up to 1GB of data
+    /// let cache = EphemeralCache::new(1_000_000_000);
+    /// ```
+    pub fn new(capacity: u64) -> Self {
+        Self {
+            cache: Cache::with_weighter(
+                capacity.try_into().unwrap_or(usize::MAX),
+                capacity,
+                GenericWeighter,
+            ),
+        }
+    }
+}
+
+impl<K, V> StorageCache<K, V> for EphemeralCache<K, V>
+where
+    K: Hash + Eq + Clone + Send + Sync + std::fmt::Debug + 'static,
+    V: Weighable + Clone + Send + Sync + std::fmt::Debug + 'static,
+{
+    fn get_or_fetch<'a>(
+        &'a self,
+        key: K,
+        fetcher: Pin<Box<dyn Future<Output = Result<V, Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>>,
+    ) -> Pin<Box<dyn Future<Output = Result<V, Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>> {
+        Box::pin(async move {
+            match self.cache.get_value_or_guard_async(&key).await {
+                Ok(value) => Ok(value),
+                Err(guard) => {
+                    let value = fetcher.await?;
+                    let _ = guard.insert(value.clone());
+                    Ok(value)
+                }
+            }
+        })
+    }
+
+    fn insert(&self, key: K, value: V) {
+        self.cache.insert(key, value);
+    }
+
+    fn remove(&self, key: &K) {
+        self.cache.remove(key);
+    }
+
+    fn clear(&self) {
+        self.cache.clear();
+    }
+}
+
+/// A generic weighter that delegates to the Weighable trait.
+#[derive(Debug, Clone)]
+struct GenericWeighter;
+
+impl<K, V> Weighter<K, V> for GenericWeighter
+where
+    K: Hash + Eq,
+    V: Weighable,
+{
+    fn weight(&self, _key: &K, value: &V) -> u64 {
+        value.weight()
+    }
+}
+
diff --git a/icechunk/src/cache/mod.rs b/icechunk/src/cache/mod.rs
new file mode 100644
index 000000000..08239961e
--- /dev/null
+++ b/icechunk/src/cache/mod.rs
@@ -0,0 +1,138 @@
+//! Pluggable cache backend infrastructure for Icechunk.
+//!
+//! This module provides a trait-based architecture for cache backends that allows
+//! different caching strategies to be used interchangeably. The cache is used by
+//! the [`AssetManager`](crate::asset_manager::AssetManager) to cache manifests,
+//! snapshots, transaction logs, and chunks.
+//!
+//! # Available Backends
+//!
+//! - **Ephemeral** ([`EphemeralCache`]): In-memory LRU cache with weight-based eviction.
+//!   This is the default backend and provides fast access with configurable memory limits.
+//!   Concurrent requests for the same uncached item are automatically deduplicated.
+//!
+//! - **NoOp** ([`NoOpCache`]): A pass-through cache that performs no caching. Useful for
+//!   debugging or when caching is not desired.
+//!
+//! # Examples
+//!
+//! The cache backend is configured via [`CachingConfig`](crate::config::CachingConfig):
+//!
+//! ```rust,no_run
+//! use icechunk::{config::CachingConfig, cache::CacheBackend};
+//!
+//! let config = CachingConfig {
+//!     num_snapshot_nodes: Some(500_000),
+//!     num_chunk_refs: Some(15_000_000),
+//!     backend: CacheBackend::Ephemeral,
+//!     ..Default::default()
+//! };
+//! ```
+//!
+//! # Implementing Custom Backends
+//!
+//! To implement a custom cache backend (e.g., Redis, filesystem), implement the
+//! [`StorageCache`] trait and add a new variant to [`CacheBackend`].
+
+use serde::{Deserialize, Serialize};
+use std::future::Future;
+use std::hash::Hash;
+use std::pin::Pin;
+
+pub mod ephemeral;
+pub mod noop;
+
+pub use ephemeral::EphemeralCache;
+pub use noop::NoOpCache;
+
+/// A trait for pluggable cache backends that support async operations,
+/// weight-based eviction, and concurrent request deduplication.
+///
+/// This trait is object-safe (uses boxed futures) to allow dynamic dispatch
+/// through `Arc<dyn StorageCache<K, V>>`.
+///
+/// # Type Parameters
+///
+/// - `K`: The cache key type (must be hashable and cloneable)
+/// - `V`: The cached value type (must be cloneable)
+///
+/// # Thread Safety
+///
+/// Implementations must be `Send + Sync` to allow use across async tasks.
+pub trait StorageCache<K, V>: Send + Sync + std::fmt::Debug
+where
+    K: Hash + Eq + Clone + Send + Sync + 'static,
+    V: Clone + Send + Sync + 'static,
+{
+    /// Gets a value from the cache, or fetches it using the provided future if not present.
+    ///
+    /// Concurrent requests for the same key should be deduplicated - only one fetch
+    /// operation should occur, with all waiters receiving the same result.
+    ///
+    /// # Parameters
+    ///
+    /// - `key`: The cache key to look up
+    /// - `fetcher`: An async function that fetches the value if not in cache
+    ///
+    /// # Returns
+    ///
+    /// The cached or fetched value, or an error if the fetch operation fails.
+    fn get_or_fetch<'a>(
+        &'a self,
+        key: K,
+        fetcher: Pin<Box<dyn Future<Output = Result<V, Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>>,
+    ) -> Pin<Box<dyn Future<Output = Result<V, Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>>;
+
+    /// Inserts a value into the cache with the given key.
+    ///
+    /// If the key already exists, the value is updated. If the cache is full,
+    /// the implementation determines which entry to evict.
+    fn insert(&self, key: K, value: V);
+
+    /// Removes a value from the cache.
+    ///
+    /// If the key doesn't exist, this is a no-op.
+    fn remove(&self, key: &K);
+
+    /// Clears all entries from the cache.
+    fn clear(&self);
+}
+
+/// Enum representing different cache backend implementations.
+///
+/// This enum is used in [`CachingConfig`](crate::config::CachingConfig) to select
+/// which cache backend to use for the repository.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum CacheBackend {
+    /// In-memory LRU cache with weight-based eviction (default).
+    ///
+    /// Provides fast access with configurable memory limits. Cache entries are
+    /// evicted based on their size (weight) using an LRU policy. This backend
+    /// automatically deduplicates concurrent requests for the same uncached item.
+    ///
+    /// This is the default backend and provides the best performance for most use cases.
+    Ephemeral,
+
+    /// No-op cache that performs no caching.
+    ///
+    /// All requests are passed directly to the storage backend without caching.
+    /// Useful for debugging, testing, or when caching overhead is not desired.
+    NoOp,
+}
+
+impl Default for CacheBackend {
+    fn default() -> Self {
+        CacheBackend::Ephemeral
+    }
+}
+
+impl CacheBackend {
+    /// Returns a human-readable name for the cache backend.
+    pub fn name(&self) -> &'static str {
+        match self {
+            CacheBackend::Ephemeral => "ephemeral",
+            CacheBackend::NoOp => "noop",
+        }
+    }
+}
diff --git a/icechunk/src/cache/noop.rs b/icechunk/src/cache/noop.rs
new file mode 100644
index 000000000..3062525b3
--- /dev/null
+++ b/icechunk/src/cache/noop.rs
@@ -0,0 +1,83 @@
+//! No-op cache backend implementation.
+//!
+//! This module provides a cache implementation that performs no caching at all.
+//! All requests are passed directly through to the underlying storage backend.
+
+use std::future::Future;
+use std::hash::Hash;
+use std::marker::PhantomData;
+use std::pin::Pin;
+
+use super::StorageCache;
+
+/// A no-op cache that performs no caching.
+///
+/// This cache implementation is a pass-through that never stores any values.
+/// All requests are forwarded directly to the fetcher function, making it
+/// equivalent to having no cache at all.
+///
+/// # Use Cases
+///
+/// - **Debugging**: Disable caching to test storage backend performance
+/// - **Testing**: Ensure tests exercise the full storage path
+/// - **Low-memory environments**: Avoid cache overhead when memory is constrained
+///
+/// # Type Parameters
+///
+/// - `K`: The key type (not actually used, but required for trait compatibility)
+/// - `V`: The value type (not actually used, but required for trait compatibility)
+#[derive(Debug)]
+pub struct NoOpCache<K, V> {
+    _phantom: PhantomData<(K, V)>,
+}
+
+impl<K, V> NoOpCache<K, V>
+where
+    K: Hash + Eq + Clone + Send + Sync + 'static,
+    V: Clone + Send + Sync + 'static,
+{
+    /// Creates a new no-op cache.
+    pub fn new() -> Self {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<K, V> Default for NoOpCache<K, V>
+where
+    K: Hash + Eq + Clone + Send + Sync + 'static,
+    V: Clone + Send + Sync + 'static,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<K, V> StorageCache<K, V> for NoOpCache<K, V>
+where
+    K: Hash + Eq + Clone + Send + Sync + std::fmt::Debug + 'static,
+    V: Clone + Send + Sync + std::fmt::Debug + 'static,
+{
+    fn get_or_fetch<'a>(
+        &'a self,
+        _key: K,
+        fetcher: Pin<Box<dyn Future<Output = Result<V, Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>>,
+    ) -> Pin<Box<dyn Future<Output = Result<V, Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>> {
+        // Always fetch, never cache
+        fetcher
+    }
+
+    fn insert(&self, _key: K, _value: V) {
+        // No-op
+    }
+
+    fn remove(&self, _key: &K) {
+        // No-op
+    }
+
+    fn clear(&self) {
+        // No-op
+    }
+}
+
diff --git a/icechunk/src/config.rs b/icechunk/src/config.rs
index 659e7ecc1..3f5775dd8 100644
--- a/icechunk/src/config.rs
+++ b/icechunk/src/config.rs
@@ -91,13 +91,73 @@ impl CompressionConfig {
     }
 }
 
+/// Configuration for the repository's caching behavior.
+///
+/// Icechunk caches various metadata and data to improve performance. This struct
+/// controls the size limits and backend type for the cache.
+///
+/// # Cache Types
+///
+/// Icechunk maintains separate caches for different data types:
+///
+/// - **Snapshot nodes**: Cached based on number of nodes
+/// - **Chunk references**: Cached based on number of references in manifests
+/// - **Transaction changes**: Cached based on number of changes
+/// - **Attributes**: Cached based on total bytes
+/// - **Chunks**: Cached based on total bytes
+///
+/// # Cache Backend
+///
+/// The `backend` field selects which cache implementation to use:
+///
+/// - [`CacheBackend::Ephemeral`](crate::cache::CacheBackend::Ephemeral) (default):
+///   In-memory LRU cache with weight-based eviction
+/// - [`CacheBackend::NoOp`](crate::cache::CacheBackend::NoOp):
+///   Disables caching entirely
+///
+/// # Examples
+///
+/// ```rust
+/// use icechunk::config::CachingConfig;
+/// use icechunk::cache::CacheBackend;
+///
+/// // Create a config with default values
+/// let config = CachingConfig::default();
+///
+/// // Create a custom config
+/// let config = CachingConfig {
+///     num_snapshot_nodes: Some(1_000_000),
+///     num_chunk_refs: Some(20_000_000),
+///     backend: CacheBackend::Ephemeral,
+///     ..Default::default()
+/// };
+/// ```
 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Default)]
 pub struct CachingConfig {
+    /// Maximum number of snapshot nodes to cache.
+    /// Default: 500,000
     pub num_snapshot_nodes: Option<u64>,
+
+    /// Maximum number of chunk references to cache from manifests.
+    /// Default: 15,000,000
     pub num_chunk_refs: Option<u64>,
+
+    /// Maximum number of transaction changes to cache.
+    /// Default: 0 (disabled)
     pub num_transaction_changes: Option<u64>,
+
+    /// Maximum bytes of attribute data to cache.
+    /// Default: 0 (disabled)
     pub num_bytes_attributes: Option<u64>,
+
+    /// Maximum bytes of chunk data to cache.
+    /// Default: 0 (disabled)
     pub num_bytes_chunks: Option<u64>,
+
+    /// The cache backend implementation to use.
+    /// Default: [`CacheBackend::Ephemeral`](crate::cache::CacheBackend::Ephemeral)
+    #[serde(default)]
+    pub backend: crate::cache::CacheBackend,
 }
 
 impl CachingConfig {
@@ -117,6 +177,10 @@ impl CachingConfig {
         self.num_bytes_chunks.unwrap_or(0)
     }
 
+    pub fn backend(&self) -> crate::cache::CacheBackend {
+        self.backend
+    }
+
     pub fn merge(&self, other: Self) -> Self {
         Self {
             num_snapshot_nodes: other.num_snapshot_nodes.or(self.num_snapshot_nodes),
@@ -128,6 +192,7 @@ impl CachingConfig {
                 .num_bytes_attributes
                 .or(self.num_bytes_attributes),
             num_bytes_chunks: other.num_bytes_chunks.or(self.num_bytes_chunks),
+            backend: other.backend,
         }
     }
 }
diff --git a/icechunk/src/lib.rs b/icechunk/src/lib.rs
index 31aef15f6..f12d28bdd 100644
--- a/icechunk/src/lib.rs
+++ b/icechunk/src/lib.rs
@@ -18,6 +18,7 @@
 //! - The datastructures are represented by concrete types in the [`mod@format`] modules.
 //!   These datastructures use Arrow RecordBatches for representation.
 pub mod asset_manager;
+pub mod cache;
 pub mod change_set;
 pub mod cli;
 pub mod config;
diff --git a/icechunk/src/strategies.rs b/icechunk/src/strategies.rs
index aca635f27..ed658ecd0 100644
--- a/icechunk/src/strategies.rs
+++ b/icechunk/src/strategies.rs
@@ -158,6 +158,7 @@ prop_compose! {
             num_transaction_changes,
             num_bytes_attributes,
             num_bytes_chunks,
+            backend: crate::cache::CacheBackend::Ephemeral,
         }
     }
 }