diff --git a/Cargo.toml b/Cargo.toml
index eda17fbc..63a2235e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,7 @@ members = [
"data-pdf",
"data-resource",
"fs-atomic-versions",
+ "fs-cache",
"fs-atomic-light",
"fs-metadata",
"fs-properties",
@@ -23,6 +24,7 @@ default-members = [
"data-pdf",
"data-resource",
"fs-atomic-versions",
+ "fs-cache",
"fs-atomic-light",
"fs-metadata",
"fs-properties",
diff --git a/README.md b/README.md
index 8a472b50..4f3dbfd7 100644
--- a/README.md
+++ b/README.md
@@ -28,18 +28,19 @@ The core crate is `fs-index` which provides us with [content addressing](https:/
-| Package | Description |
-| --------------- | ---------------------------------------- |
-| `ark-cli` | The CLI tool to interact with ark crates |
-| `data-resource` | Resource hashing and ID construction |
-| `fs-index` | Resource Index construction and updating |
-| `fs-storage` | Filesystem storage for resources |
-| `fs-metadata` | Metadata management |
-| `fs-properties` | Properties management |
-| `data-link` | Linking resources |
-| `data-pdf` | PDF handling |
-| `data-error` | Error handling |
-| `data-json` | JSON serialization and deserialization |
+| Package | Description |
+| --------------- | ---------------------------------------- |
+| `ark-cli` | The CLI tool to interact with ARK crates |
+| `data-resource` | Resource hashing and ID construction |
+| `fs-cache` | Memory and disk caching with LRU eviction |
+| `fs-index` | Resource Index construction and updating |
+| `fs-storage` | Key-value storage persisted on filesystem |
+| `fs-metadata` | Metadata management |
+| `fs-properties` | Properties management |
+| `data-link` | Linking resources |
+| `data-pdf` | PDF handling |
+| `data-error` | Error handling |
+| `data-json` | JSON serialization and deserialization |
diff --git a/ark-cli/src/util.rs b/ark-cli/src/util.rs
index 40eff290..c5db3536 100644
--- a/ark-cli/src/util.rs
+++ b/ark-cli/src/util.rs
@@ -179,10 +179,9 @@ pub fn translate_storage(
root: &Option,
storage: &str,
) -> Option<(PathBuf, Option)> {
- if let Ok(path) = PathBuf::from_str(storage) {
- if path.exists() && path.is_dir() {
- return Some((path, None));
- }
+ let Ok(path) = PathBuf::from_str(storage);
+ if path.exists() && path.is_dir() {
+ return Some((path, None));
}
match storage.to_lowercase().as_str() {
diff --git a/data-link/src/lib.rs b/data-link/src/lib.rs
index ba925722..42de634f 100644
--- a/data-link/src/lib.rs
+++ b/data-link/src/lib.rs
@@ -136,7 +136,7 @@ impl Link {
pub fn get_preview_synced(&self) -> Result {
let runtime =
tokio::runtime::Runtime::new().expect("Unable to create a runtime");
- return runtime.block_on(self.get_preview());
+ runtime.block_on(self.get_preview())
}
/// Get OGP metadata of the link.
diff --git a/fs-atomic-versions/src/atomic/file.rs b/fs-atomic-versions/src/atomic/file.rs
index a2b708ab..4947b77e 100644
--- a/fs-atomic-versions/src/atomic/file.rs
+++ b/fs-atomic-versions/src/atomic/file.rs
@@ -126,7 +126,7 @@ impl AtomicFile {
/// Return the latest version together with vector of the
/// files matching this version. Multiple files for the same version
- /// can appear due to usage of file syncronization. Different devices
+ /// can appear due to usage of file synchronization. Different devices
/// can create same version simultaneously.
pub fn latest_version(&self) -> Result<(usize, Vec)> {
let files_iterator = fs::read_dir(&self.directory)?.flatten();
diff --git a/fs-cache/Cargo.toml b/fs-cache/Cargo.toml
new file mode 100644
index 00000000..543f8a69
--- /dev/null
+++ b/fs-cache/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "fs-cache"
+version = "0.1.0"
+edition = "2021"
+
+[lib]
+name = "fs_cache"
+crate-type = ["rlib", "cdylib"]
+bench = false
+
+[dependencies]
+log = { version = "0.4.17", features = ["release_max_level_off"] }
+data-error = { path = "../data-error" }
+fs-storage = { path = "../fs-storage"}
+fs-atomic-light = { path = "../fs-atomic-light" }
+lru = "0.12.5"
+
+[dev-dependencies]
+tempdir = "0.3.7"
diff --git a/fs-cache/src/cache.rs b/fs-cache/src/cache.rs
new file mode 100644
index 00000000..3b0824eb
--- /dev/null
+++ b/fs-cache/src/cache.rs
@@ -0,0 +1,761 @@
+use std::fs;
+use std::num::NonZeroUsize;
+use std::path::{Path, PathBuf};
+
+use data_error::{ArklibError, Result};
+use fs_atomic_light::temp_and_move;
+use fs_storage::utils::extract_key_from_file_path;
+use lru::LruCache;
+
+/// A cache entry that stores a value and its size in bytes.
+///
+/// This structure is used to track both the actual data (value)
+/// and its memory usage (size) in the cache.
+struct CacheEntry {
+ value: V,
+ size: usize,
+}
+
+/// A combined in-memory and disk-based cache system.
+///
+/// This cache uses an LRU (Least Recently Used) eviction policy for the
+/// in-memory portion and persists data to disk for long-term storage.
+pub struct Cache {
+ /// Label for logging
+ label: String,
+ /// Path to the underlying folder where data is persisted
+ path: PathBuf,
+ /// An in-memory LRU cache for quick access to frequently used items.
+ memory_cache: LruCache>,
+ /// The current memory usage in bytes.
+ current_memory_bytes: usize,
+ /// The maximum allowable memory usage in bytes.
+ max_memory_bytes: usize,
+}
+
+impl Cache
+where
+ K: Ord
+ + Clone
+ + std::fmt::Display
+ + std::hash::Hash
+ + std::str::FromStr
+ + std::fmt::Debug,
+ V: Clone + std::fmt::Debug + AsRef<[u8]> + From>,
+{
+ /// Creates a new cache instance.
+ ///
+ /// # Arguments
+ /// * `label` - Identifier used in logs
+ /// * `path` - Directory where cache files are stored
+ /// * `max_memory_bytes` - Maximum bytes to keep in memory
+ /// * `preload_cache` - Whether to pre-load the cache from disk on initialization
+ pub fn new(
+ label: String,
+ path: &Path,
+ max_memory_bytes: usize,
+ preload_cache: bool,
+ ) -> Result {
+ Self::validate_path(path, &label)?;
+
+ let memory_cache = LruCache::new(
+ NonZeroUsize::new(max_memory_bytes)
+ .expect("Capacity can't be zero"),
+ );
+
+ let mut cache = Self {
+ label: label.clone(),
+ path: PathBuf::from(path),
+ memory_cache,
+ current_memory_bytes: 0,
+ max_memory_bytes,
+ };
+
+ log::debug!(
+ "cache/{}: initialized with {} bytes limit",
+ label,
+ max_memory_bytes
+ );
+
+ if preload_cache {
+ cache.load_fs()?;
+ }
+ Ok(cache)
+ }
+
+ /// Retrieves a value by its key, checking memory first then disk.
+ /// Returns None if the key doesn't exist.
+ pub fn get(&mut self, key: &K) -> Option {
+ log::debug!("cache/{}: retrieving value for key {}", self.label, key);
+
+ if let Some(v) = self.fetch_from_memory(key) {
+ log::debug!(
+ "cache/{}: value for key {} retrieved from memory",
+ self.label,
+ key
+ );
+ return Some(v);
+ }
+ if let Some(v) = self.fetch_from_disk(key) {
+ log::debug!(
+ "cache/{}: value for key {} retrieved from disk",
+ self.label,
+ key
+ );
+ return Some(v);
+ }
+
+ log::warn!("cache/{}: no value found for key {}", self.label, key);
+ None
+ }
+
+ /// Stores a new value with the given key.
+ /// Returns error if the key already exists or if writing fails.
+ pub fn set(&mut self, key: K, value: V) -> Result<()> {
+ log::debug!("cache/{}: setting value for key {}", self.label, key);
+
+ // Check if value already exists
+ if self.exists(&key) {
+ return Err(ArklibError::Storage(
+ self.label.clone(),
+ format!("Key {} already exists in cache", key),
+ ));
+ }
+
+ // Always write to disk first
+ self.persist_to_disk(&key, &value)?;
+
+ // Then update memory cache
+ self.update_memory_cache(&key, &value)?;
+
+ log::debug!("cache/{}: set key={}", self.label, key);
+ Ok(())
+ }
+
+ /// Checks if a value exists either in memory or on disk.
+ pub fn exists(&self, key: &K) -> bool {
+ self.memory_cache.contains(key)
+ || self.path.join(key.to_string()).exists()
+ }
+
+ /// Returns an iterator over all cached keys.
+ /// The keys are returned in filesystem directory order,
+ /// which should not be relied upon for any specific ordering.
+ pub fn keys(&self) -> Result> {
+ let entries = fs::read_dir(&self.path).map_err(|e| {
+ ArklibError::Storage(self.label.clone(), e.to_string())
+ })?;
+
+ let keys: Vec = entries
+ .filter_map(|entry| match entry {
+ Ok(entry) => {
+ let path = entry.path();
+ if path.is_file() {
+ extract_key_from_file_path(&self.label, &path, true)
+ .ok()
+ } else {
+ None
+ }
+ }
+ Err(_) => None,
+ })
+ .collect();
+
+ Ok(keys.into_iter())
+ }
+
+ /// Internal Methods:
+ /// Initializes the memory cache by loading the most recently modified files up to the memory limit.
+ ///
+ /// First collects metadata for all files, sorts them by modification time, and then loads as many
+ /// recent files as possible within the memory limit. Files that don't fit in memory remain only
+ /// on disk.
+ fn load_fs(&mut self) -> Result<()> {
+ // Collect metadata for all files
+ let mut file_metadata = Vec::new();
+ for entry in fs::read_dir(&self.path)? {
+ let entry = entry?;
+ let path = entry.path();
+ if path.is_file() {
+ let key: K =
+ extract_key_from_file_path(&self.label, &path, true)?;
+ let metadata = entry.metadata()?;
+ let modified = metadata.modified()?;
+ let size = metadata.len() as usize;
+ file_metadata.push((key, size, modified));
+ }
+ }
+
+ // Sort by modified time (most recent first)
+ file_metadata.sort_by(|a, b| b.2.cmp(&a.2));
+
+ // Clear existing cache
+ self.memory_cache.clear();
+ self.current_memory_bytes = 0;
+
+ // Load files that fit in memory
+ let mut loaded_bytes = 0;
+ let total_bytes: usize = file_metadata
+ .iter()
+ .map(|(_, size, _)| size)
+ .sum();
+
+ for (key, size, _) in file_metadata {
+ if loaded_bytes + size <= self.max_memory_bytes {
+ match self.read_from_disk(&key) {
+ Ok(value) => {
+ self.memory_cache
+ .put(key, CacheEntry { value, size });
+ loaded_bytes += size;
+ }
+ Err(err) => {
+ log::warn!(
+ "cache/{}: failed to load key={}: {}",
+ self.label,
+ key,
+ err
+ );
+ }
+ }
+ }
+ }
+
+ self.current_memory_bytes = loaded_bytes;
+
+ log::debug!(
+ "cache/{}: loaded {}/{} bytes in memory",
+ self.label,
+ self.current_memory_bytes,
+ total_bytes
+ );
+
+ Ok(())
+ }
+
+ /// Validates the provided path.
+ ///
+ /// # Arguments
+ /// * `path` - The path to validate
+ /// * `label` - Identifier used in logs
+ fn validate_path(path: &Path, label: &str) -> Result<()> {
+ if !path.exists() {
+ return Err(ArklibError::Storage(
+ label.to_owned(),
+ "Folder does not exist".to_owned(),
+ ));
+ }
+
+ if !path.is_dir() {
+ return Err(ArklibError::Storage(
+ label.to_owned(),
+ "Path is not a directory".to_owned(),
+ ));
+ }
+
+ Ok(())
+ }
+
+ /// Retrieves a value from the memory cache.
+ fn fetch_from_memory(&mut self, key: &K) -> Option {
+ self.memory_cache
+ .get(key)
+ .map(|entry| entry.value.clone())
+ }
+
+ /// Retrieves a value from disk and caches it in memory if possible.
+ fn fetch_from_disk(&mut self, key: &K) -> Option {
+ let file_path = self.path.join(key.to_string());
+ if !file_path.exists() {
+ log::warn!("cache/{}: no value found for key {}", self.label, key);
+ return None;
+ }
+
+ match self.read_from_disk(key) {
+ Ok(value) => {
+ if let Err(err) = self.update_memory_cache(key, &value) {
+ log::error!(
+ "cache/{}: failed to add to memory cache for key {}: {}",
+ self.label,
+ key,
+ err
+ );
+ return None;
+ }
+ Some(value)
+ }
+ Err(err) => {
+ log::error!(
+ "cache/{}: failed to load from disk for key {}: {}",
+ self.label,
+ key,
+ err
+ );
+ None
+ }
+ }
+ }
+
+ /// Writes a value to disk using atomic operations.
+ fn persist_to_disk(&mut self, key: &K, value: &V) -> Result<()> {
+ log::debug!("cache/{}: writing to disk for key {}", self.label, key);
+
+ if !self.path.exists() {
+ return Err(ArklibError::Storage(
+ self.label.clone(),
+ format!(
+ "Cache directory does not exist: {}",
+ self.path.display()
+ ),
+ ));
+ }
+
+ let file_path = self.path.join(key.to_string());
+ debug_assert!(
+ !file_path.exists(),
+ "File {} should not exist before writing",
+ file_path.display()
+ );
+
+ temp_and_move(value.as_ref(), &self.path, &key.to_string()).map_err(
+ |err| {
+ ArklibError::Storage(
+ self.label.clone(),
+ format!("Failed to write value for key {}: {}", key, err),
+ )
+ },
+ )
+ }
+
+ /// Reads a value from disk.
+ fn read_from_disk(&self, key: &K) -> Result
+ where
+ V: From>, // Add trait bound for reading binary data
+ {
+ let file_path = self.path.join(key.to_string());
+ let contents = fs::read(&file_path)?;
+ Ok(V::from(contents))
+ }
+
+ /// Returns the size of a value in bytes.
+ ///
+ /// First checks the memory cache for size information,
+ /// falls back to checking the file size on disk if not found in memory.
+ fn get_file_size(&self, key: &K) -> Result {
+ if let Some(entry) = self.memory_cache.peek(key) {
+ return Ok(entry.size);
+ }
+ Ok(fs::metadata(self.path.join(key.to_string()))?.len() as usize)
+ }
+
+ /// Adds or updates a value in the memory cache, evicting old entries if needed.
+ /// Logs error and skips caching if the value is larger than the maximum memory limit.
+ fn update_memory_cache(&mut self, key: &K, value: &V) -> Result<()> {
+ log::debug!("cache/{}: caching in memory for key {}", self.label, key);
+ let size = self.get_file_size(key)?;
+
+ // If single value is larger than total limit, just skip memory caching
+ if size > self.max_memory_bytes {
+ log::error!(
+ "cache/{}: value size {} exceeds limit {}",
+ self.label,
+ size,
+ self.max_memory_bytes
+ );
+ return Ok(());
+ }
+
+ // Remove oldest entries until we have space for new value
+ while self.current_memory_bytes + size > self.max_memory_bytes {
+ if let Some((_, old_entry)) = self.memory_cache.pop_lru() {
+ if self.current_memory_bytes < old_entry.size {
+ log::error!(
+ "cache/{}: Memory tracking inconsistency - current: {}, entry size: {}",
+ self.label,
+ self.current_memory_bytes,
+ old_entry.size
+ );
+ self.current_memory_bytes = 0;
+ } else {
+ self.current_memory_bytes -= old_entry.size;
+ }
+ }
+ }
+
+ // Add new value and update size
+ if let Some((evicted_key, _)) = self.memory_cache.push(
+ key.clone(),
+ CacheEntry {
+ value: value.clone(),
+ size,
+ },
+ ) {
+ log::debug!(
+ "cache/{}: evicted key {} from memory cache",
+ self.label,
+ evicted_key
+ );
+ }
+
+ self.current_memory_bytes += size;
+
+ log::debug!(
+ "cache/{}: added {} bytes, total {}/{}",
+ self.label,
+ size,
+ self.current_memory_bytes,
+ self.max_memory_bytes
+ );
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::{
+ fs::File,
+ io::Write,
+ time::{Duration, SystemTime},
+ };
+ use tempdir::TempDir;
+
+ /// Helper function to create a temporary directory
+ fn create_temp_dir() -> TempDir {
+ TempDir::new("tmp").expect("Failed to create temporary directory")
+ }
+
+ /// Helper function to create a test cache with default settings
+ fn create_test_cache(temp_dir: &TempDir) -> Cache> {
+ Cache::new(
+ "test".to_string(),
+ temp_dir.path(),
+ 1024 * 1024, // 1MB
+ true, // Enable preloading by default
+ )
+ .expect("Failed to create cache")
+ }
+
+ #[test]
+ fn test_new_cache() {
+ let temp_dir = create_temp_dir();
+ let cache = create_test_cache(&temp_dir);
+ assert_eq!(cache.current_memory_bytes, 0);
+ assert_eq!(cache.max_memory_bytes, 1024 * 1024);
+ }
+
+ #[test]
+ fn test_set_and_get() {
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ let key = "test_key".to_string();
+ let value = vec![1, 2, 3, 4];
+
+ cache
+ .set(key.clone(), value.clone())
+ .expect("Failed to set value");
+ let retrieved = cache.get(&key).expect("Failed to get value");
+ assert_eq!(retrieved, value);
+ }
+
+ #[test]
+ fn test_exists() {
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ let key = "test_key".to_string();
+ let value = vec![1, 2, 3, 4];
+
+ assert!(!cache.exists(&key));
+ cache
+ .set(key.clone(), value)
+ .expect("Failed to set value");
+ assert!(cache.exists(&key));
+ }
+
+ #[test]
+ fn test_get_nonexistent() {
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ assert!(cache.get(&"nonexistent".to_string()).is_none());
+ }
+
+ #[test]
+ fn test_keys() {
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ let values = vec![
+ ("key1".to_string(), vec![1, 2]),
+ ("key2".to_string(), vec![3, 4]),
+ ("key3".to_string(), vec![5, 6]),
+ ];
+
+ // Add values
+ for (key, data) in values.iter() {
+ cache
+ .set(key.clone(), data.clone())
+ .expect("Failed to set value");
+ }
+
+ // Check keys
+ let mut cache_keys: Vec<_> = cache
+ .keys()
+ .expect("Failed to get keys")
+ .collect();
+ cache_keys.sort();
+ let mut expected_keys: Vec<_> =
+ values.iter().map(|(k, _)| k.clone()).collect();
+ expected_keys.sort();
+
+ assert_eq!(cache_keys, expected_keys);
+ }
+
+ #[test]
+ fn test_memory_eviction() {
+ let temp_dir = create_temp_dir();
+ let mut cache = Cache::new(
+ "test".to_string(),
+ temp_dir.path(),
+ 8, // Very small limit to force eviction
+ true, // Enable preloading by default
+ )
+ .expect("Failed to create cache");
+
+ // Add first value
+ let key1 = "key1.txt".to_string();
+ let value1 = vec![1, 2, 3, 4, 5, 7];
+ cache
+ .set(key1.clone(), value1.clone())
+ .expect("Failed to set value1");
+
+ // Add second value to trigger eviction
+ let key2 = "key2.json".to_string();
+ let value2 = vec![5, 6, 8];
+ cache
+ .set(key2.clone(), value2.clone())
+ .expect("Failed to set value2");
+
+ // First value should be evicted from memory but still on disk
+ assert!(cache.memory_cache.get(&key1).is_none());
+ assert_eq!(cache.get(&key1).unwrap(), value1); // Should load from disk
+ }
+
+ #[test]
+ fn test_large_value_handling() {
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ let key = "large_key".to_string();
+ let large_value = vec![0; 2 * 1024 * 1024]; // 2MB, larger than cache
+
+ // Should fail to cache in memory but succeed in writing to disk
+ assert!(cache
+ .set(key.clone(), large_value.clone())
+ .is_ok());
+ assert_eq!(cache.get(&key).unwrap(), large_value); // Should load from disk
+ }
+
+ #[test]
+ fn test_persistence() {
+ let temp_dir = create_temp_dir();
+ let key = "persist_key".to_string();
+ let value = vec![1, 2, 3, 4];
+
+ // Scope for first cache instance
+ {
+ let mut cache =
+ Cache::new("test".to_string(), temp_dir.path(), 1024, true)
+ .expect("Failed to create first cache");
+ cache
+ .set(key.clone(), value.clone())
+ .expect("Failed to set value");
+ }
+
+ // Create new cache instance pointing to same directory
+ let mut cache2 =
+ Cache::new("test".to_string(), temp_dir.path(), 1024, true)
+ .expect("Failed to create second cache");
+
+ // Should be able to read value written by first instance
+ let retrieved: Vec = cache2.get(&key).expect("Failed to get value");
+ assert_eq!(retrieved, value);
+ }
+
+ #[test]
+ fn test_concurrent_reads() {
+ use std::thread;
+
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ let key = "test_key".to_string();
+ let value = vec![1, 2, 3, 4];
+
+ // Set up initial cache with data
+ cache
+ .set(key.clone(), value.clone())
+ .expect("Failed to set value");
+
+ // Create multiple reader caches
+ let mut handles: Vec>>> = vec![];
+ for _ in 0..3 {
+ let key = key.clone();
+ let cache_path = temp_dir.path().to_path_buf();
+
+ handles.push(thread::spawn(move || {
+ let mut reader_cache =
+ Cache::new("test".to_string(), &cache_path, 1024, true)
+ .expect("Failed to create reader cache");
+
+ reader_cache.get(&key)
+ }));
+ }
+
+ // All readers should get the same value
+ for handle in handles {
+ let result = handle.join().expect("Thread panicked");
+ assert_eq!(result.unwrap(), value);
+ }
+ }
+
+ #[test]
+ fn test_duplicate_set() {
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ let key = "dup_key".to_string();
+ let value1 = vec![1, 2, 3, 4];
+ let value2 = vec![5, 6, 7, 8];
+
+ // First set
+ cache
+ .set(key.clone(), value1.clone())
+ .expect("Failed to set first value");
+
+ // Second set with same key should panic
+ assert!(cache.set(key.clone(), value2).is_err());
+
+ // Should still have first value
+ let retrieved = cache.get(&key).expect("Failed to get value");
+ assert_eq!(retrieved, value1);
+ }
+
+ #[test]
+ fn test_loads_recent_files_first() {
+ let temp_dir = create_temp_dir();
+ let mut cache: Cache> = Cache::new(
+ "test".to_string(),
+ temp_dir.path(),
+ 4, // Small limit to force selection
+ true, // Enable preloading by default
+ )
+ .expect("Failed to create cache");
+
+ // Create files with different timestamps
+ let files = vec![
+ (
+ "old.txt",
+ vec![1, 2, 3],
+ SystemTime::now() - Duration::from_secs(100),
+ ),
+ ("new.txt", vec![3, 4], SystemTime::now()),
+ ];
+
+ for (name, data, time) in files {
+ let path = temp_dir.path().join(name);
+ let mut file = File::create(path).unwrap();
+ file.write_all(&data).unwrap();
+ file.set_modified(time).unwrap();
+ file.sync_all().unwrap();
+ }
+
+ // Reload cache
+ cache.load_fs().expect("Failed to load files");
+
+ // Verify newer file is in memory
+ assert!(cache
+ .memory_cache
+ .contains(&"new.txt".to_string()));
+ assert!(!cache
+ .memory_cache
+ .contains(&"old.txt".to_string()));
+ }
+
+ #[test]
+ #[should_panic(expected = "Capacity can't be zero")]
+ fn test_zero_capacity() {
+ let temp_dir = create_temp_dir();
+ let _cache: std::result::Result>, ArklibError> =
+ Cache::new("test".to_string(), temp_dir.path(), 0, true);
+ }
+
+ #[test]
+ fn test_memory_tracking() {
+ let temp_dir = create_temp_dir();
+ let mut cache = create_test_cache(&temp_dir);
+ let key = "track_key".to_string();
+ let value = vec![1, 2, 3, 4]; // 4 bytes
+
+ cache
+ .set(key.clone(), value)
+ .expect("Failed to set value");
+
+ // Memory usage should match file size
+ assert_eq!(cache.current_memory_bytes, 4);
+ }
+
+ #[test]
+ fn test_preload_behavior() {
+ let temp_dir = create_temp_dir();
+
+ // First create and populate a cache
+ let mut initial_cache = Cache::new(
+ "test".to_string(),
+ temp_dir.path(),
+ 1024,
+ false, // Don't preload yet
+ )
+ .expect("Failed to create initial cache");
+
+ // Add some test data
+ let test_data = vec![
+ ("key1".to_string(), vec![1, 2, 3]),
+ ("key2".to_string(), vec![4, 5, 6]),
+ ];
+
+ for (key, value) in &test_data {
+ initial_cache
+ .set(key.clone(), value.clone())
+ .expect("Failed to set value");
+ }
+
+ // Drop the initial cache
+ drop(initial_cache);
+
+ // Create new cache instances with different preload settings
+ let mut no_preload_cache: Cache> = Cache::new(
+ "test".to_string(),
+ temp_dir.path(),
+ 1024,
+ false, // Don't preload
+ )
+ .expect("Failed to create no-preload cache");
+
+ let mut preload_cache: Cache> = Cache::new(
+ "test".to_string(),
+ temp_dir.path(),
+ 1024,
+ true, // Do preload
+ )
+ .expect("Failed to create preload cache");
+
+ // Check memory cache state
+ for (key, _) in &test_data {
+ // No-preload cache should not have items in memory
+ assert!(no_preload_cache.memory_cache.get(key).is_none());
+
+ // Preload cache should have items in memory
+ assert!(preload_cache.memory_cache.get(key).is_some());
+
+ // Both should still be able to get the values
+ assert!(no_preload_cache.get(key).is_some());
+ assert!(preload_cache.get(key).is_some());
+ }
+ }
+}
diff --git a/fs-cache/src/lib.rs b/fs-cache/src/lib.rs
new file mode 100644
index 00000000..acd91ad5
--- /dev/null
+++ b/fs-cache/src/lib.rs
@@ -0,0 +1,3 @@
+mod cache;
+
+pub use cache::Cache;
diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs
index 6746ac29..e66ef917 100644
--- a/fs-index/src/index.rs
+++ b/fs-index/src/index.rs
@@ -75,7 +75,6 @@ type IndexedPaths = HashSet>;
/// #### Reactive API
/// - [`ResourceIndex::update_all`]: Method to update the index by rescanning
/// files and returning changes (additions/deletions/updates).
-
///
/// #### Snapshot API
/// - [`ResourceIndex::get_resources_by_id`]: Query resources from the index by
diff --git a/fs-storage/src/file_storage.rs b/fs-storage/src/file_storage.rs
index 2e3332e9..f4676984 100644
--- a/fs-storage/src/file_storage.rs
+++ b/fs-storage/src/file_storage.rs
@@ -1,8 +1,7 @@
use serde::{Deserialize, Serialize};
use std::{
collections::BTreeMap,
- fs::{self, File},
- io::Write,
+ fs,
path::{Path, PathBuf},
time::SystemTime,
};
@@ -10,7 +9,7 @@ use std::{
use crate::{
base_storage::{BaseStorage, SyncStatus},
monoid::Monoid,
- utils::read_version_2_fs,
+ utils::{read_version_2_fs, write_json_file},
};
use data_error::{ArklibError, Result};
@@ -262,13 +261,9 @@ where
)
})?;
fs::create_dir_all(parent_dir)?;
- let mut file = File::create(&self.path)?;
- file.write_all(serde_json::to_string_pretty(&self.data)?.as_bytes())?;
- file.flush()?;
let new_timestamp = SystemTime::now();
- file.set_modified(new_timestamp)?;
- file.sync_all()?;
+ write_json_file(&self.path, &self.data, new_timestamp)?;
self.modified = new_timestamp;
self.written_to_disk = new_timestamp;
diff --git a/fs-storage/src/folder_storage.rs b/fs-storage/src/folder_storage.rs
index 68892a49..e1d147d2 100644
--- a/fs-storage/src/folder_storage.rs
+++ b/fs-storage/src/folder_storage.rs
@@ -1,6 +1,5 @@
use std::collections::BTreeSet;
use std::fs::{self, File};
-use std::io::Write;
use std::time::SystemTime;
use std::{
collections::BTreeMap,
@@ -9,6 +8,7 @@ use std::{
use crate::base_storage::{BaseStorage, SyncStatus};
use crate::monoid::Monoid;
+use crate::utils::{extract_key_from_file_path, write_json_file};
use data_error::{ArklibError, Result};
/// Represents a folder storage system that persists data to disk.
@@ -92,7 +92,8 @@ where
.extension()
.map_or(false, |ext| ext == "json")
{
- let key: K = extract_key_from_file_path(&self.label, &path)?;
+ let key: K =
+ extract_key_from_file_path(&self.label, &path, false)?;
let file = File::open(&path)?;
let value: V =
serde_json::from_reader(file).map_err(|err| {
@@ -280,7 +281,8 @@ where
.extension()
.map_or(false, |ext| ext == "json")
{
- let key = extract_key_from_file_path(&self.label, &path)?;
+ let key =
+ extract_key_from_file_path(&self.label, &path, false)?;
if !self.data.contains_key(&key)
&& !self.deleted_keys.contains(&key)
{
@@ -348,13 +350,9 @@ where
for (key, value) in &self.data {
let file_path = self.path.join(format!("{}.json", key));
- let mut file = File::create(&file_path)?;
- file.write_all(serde_json::to_string_pretty(&value)?.as_bytes())?;
- file.flush()?;
let new_timestamp = SystemTime::now();
- file.set_modified(new_timestamp)?;
- file.sync_all()?;
+ write_json_file(&file_path, &value, new_timestamp)?;
self.timestamps
.insert(key.clone(), (new_timestamp, new_timestamp));
@@ -414,33 +412,6 @@ where
}
}
-fn extract_key_from_file_path(label: &str, path: &Path) -> Result
-where
- K: std::str::FromStr,
-{
- path.file_stem()
- .ok_or_else(|| {
- ArklibError::Storage(
- label.to_owned(),
- "Failed to extract file stem from filename".to_owned(),
- )
- })?
- .to_str()
- .ok_or_else(|| {
- ArklibError::Storage(
- label.to_owned(),
- "Failed to convert file stem to string".to_owned(),
- )
- })?
- .parse::()
- .map_err(|_| {
- ArklibError::Storage(
- label.to_owned(),
- "Failed to parse key from filename".to_owned(),
- )
- })
-}
-
#[cfg(test)]
mod tests {
use crate::{
diff --git a/fs-storage/src/lib.rs b/fs-storage/src/lib.rs
index 83e3868c..28f53603 100644
--- a/fs-storage/src/lib.rs
+++ b/fs-storage/src/lib.rs
@@ -6,7 +6,7 @@ pub mod folder_storage;
pub mod jni;
pub mod monoid;
-mod utils;
+pub mod utils;
pub const ARK_FOLDER: &str = ".ark";
// Should not be lost if possible
diff --git a/fs-storage/src/utils.rs b/fs-storage/src/utils.rs
index d1e818bd..26cbecec 100644
--- a/fs-storage/src/utils.rs
+++ b/fs-storage/src/utils.rs
@@ -1,5 +1,7 @@
-use data_error::Result;
-use std::{collections::BTreeMap, path::Path};
+use data_error::{ArklibError, Result};
+use serde::Serialize;
+use std::io::Write;
+use std::{collections::BTreeMap, fs::File, path::Path, time::SystemTime};
/// Parses version 2 `FileStorage` format and returns the data as a BTreeMap
///
@@ -51,31 +53,127 @@ where
Ok(data)
}
+/// Writes a serializable value to a file.
+///
+/// This function takes a path, a serializable value, and a timestamp. It writes the value to the specified
+/// file in a pretty JSON format. The function ensures that the file is flushed and synced after writing.
+pub fn write_json_file(
+ path: &Path,
+ value: &T,
+ time: SystemTime,
+) -> Result<()> {
+ let mut file = File::create(path)?;
+ file.write_all(serde_json::to_string_pretty(value)?.as_bytes())?;
+ file.flush()?;
+
+ file.set_modified(time)?;
+ file.sync_all()?;
+
+ Ok(())
+}
+
+/// Extracts a key of type K from the given file path.
+///
+/// The function can include or exclude the file extension based on the `include_extension` parameter.
+/// It returns a Result containing the parsed key or an error if the extraction or parsing fails.
+pub fn extract_key_from_file_path(
+ label: &str,
+ path: &Path,
+ include_extension: bool,
+) -> Result
+where
+ K: std::str::FromStr,
+{
+ match include_extension {
+ true => path.file_name(), // ("tmp/foo.txt").file_name() -> ("foo.txt")
+ false => path.file_stem(), // ("tmp/foo.txt").file_stem() -> ("foo")
+ }
+ .ok_or_else(|| {
+ ArklibError::Storage(
+ label.to_owned(),
+ "Failed to extract file stem from filename".to_owned(),
+ )
+ })?
+ .to_str()
+ .ok_or_else(|| {
+ ArklibError::Storage(
+ label.to_owned(),
+ "Failed to convert file stem to string".to_owned(),
+ )
+ })?
+ .parse::()
+ .map_err(|_| {
+ ArklibError::Storage(
+ label.to_owned(),
+ "Failed to parse key from filename".to_owned(),
+ )
+ })
+}
+
#[cfg(test)]
mod tests {
use super::*;
+ use serde_json::json;
use std::io::Write;
use tempdir::TempDir;
/// Test reading a legacy version 2 `FileStorage` file
#[test]
fn test_read_legacy_fs() {
- let temp_dir = TempDir::new("ark-rust").unwrap();
+ let temp_dir = TempDir::new("ark-rust")
+ .expect("Failed to create temporary directory");
let file_path = temp_dir.path().join("test_read_legacy_fs");
let file_content = r#"version: 2
key1:1
key2:2
key3:3
"#;
- let mut file = std::fs::File::create(&file_path).unwrap();
- file.write_all(file_content.as_bytes()).unwrap();
+ let mut file = std::fs::File::create(&file_path)
+ .expect("Failed to create test file");
+ file.write_all(file_content.as_bytes())
+ .expect("Failed to write to test file");
// Read the file and check the data
- let data: BTreeMap =
- read_version_2_fs(&file_path).unwrap();
+ let data: BTreeMap = read_version_2_fs(&file_path)
+ .expect("Failed to read version 2 file storage");
assert_eq!(data.len(), 3);
assert_eq!(data.get("key1"), Some(&1));
assert_eq!(data.get("key2"), Some(&2));
assert_eq!(data.get("key3"), Some(&3));
}
+
+ /// Test writing a JSON file
+ #[test]
+ fn test_write_json_file() {
+ let temp_dir = TempDir::new("ark-rust")
+ .expect("Failed to create temporary directory");
+ let file_path = temp_dir.path().join("test_write_json_file.json");
+ let value = json!({"key": "value"});
+
+ write_json_file(&file_path, &value, SystemTime::now())
+ .expect("Failed to write JSON file");
+
+ let written_content = std::fs::read_to_string(&file_path)
+ .expect("Failed to read written JSON file");
+ let expected_content = serde_json::to_string_pretty(&value)
+ .expect("Failed to serialize JSON value");
+ assert_eq!(written_content, expected_content);
+ }
+
+ /// Test extracting a key from a file path
+ #[test]
+ fn test_extract_key_from_file_path() {
+ let path_with_extension = Path::new("tmp/foo.txt");
+ let path_without_extension = Path::new("tmp/foo");
+
+ let key_with_extension: String =
+ extract_key_from_file_path("test", path_with_extension, true)
+ .expect("Failed to extract key with extension");
+ assert_eq!(key_with_extension, "foo.txt");
+
+ let key_without_extension: String =
+ extract_key_from_file_path("test", path_without_extension, false)
+ .expect("Failed to extract key without extension");
+ assert_eq!(key_without_extension, "foo");
+ }
}