diff --git a/Cargo.toml b/Cargo.toml index eda17fbc..63a2235e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "data-pdf", "data-resource", "fs-atomic-versions", + "fs-cache", "fs-atomic-light", "fs-metadata", "fs-properties", @@ -23,6 +24,7 @@ default-members = [ "data-pdf", "data-resource", "fs-atomic-versions", + "fs-cache", "fs-atomic-light", "fs-metadata", "fs-properties", diff --git a/README.md b/README.md index 8a472b50..4f3dbfd7 100644 --- a/README.md +++ b/README.md @@ -28,18 +28,19 @@ The core crate is `fs-index` which provides us with [content addressing](https:/
-| Package | Description | -| --------------- | ---------------------------------------- | -| `ark-cli` | The CLI tool to interact with ark crates | -| `data-resource` | Resource hashing and ID construction | -| `fs-index` | Resource Index construction and updating | -| `fs-storage` | Filesystem storage for resources | -| `fs-metadata` | Metadata management | -| `fs-properties` | Properties management | -| `data-link` | Linking resources | -| `data-pdf` | PDF handling | -| `data-error` | Error handling | -| `data-json` | JSON serialization and deserialization | +| Package | Description | +| --------------- | ---------------------------------------- | +| `ark-cli` | The CLI tool to interact with ARK crates | +| `data-resource` | Resource hashing and ID construction | +| `fs-cache` | Memory and disk caching with LRU eviction | +| `fs-index` | Resource Index construction and updating | +| `fs-storage` | Key-value storage persisted on filesystem | +| `fs-metadata` | Metadata management | +| `fs-properties` | Properties management | +| `data-link` | Linking resources | +| `data-pdf` | PDF handling | +| `data-error` | Error handling | +| `data-json` | JSON serialization and deserialization |
diff --git a/ark-cli/src/util.rs b/ark-cli/src/util.rs index 40eff290..c5db3536 100644 --- a/ark-cli/src/util.rs +++ b/ark-cli/src/util.rs @@ -179,10 +179,9 @@ pub fn translate_storage( root: &Option, storage: &str, ) -> Option<(PathBuf, Option)> { - if let Ok(path) = PathBuf::from_str(storage) { - if path.exists() && path.is_dir() { - return Some((path, None)); - } + let Ok(path) = PathBuf::from_str(storage); + if path.exists() && path.is_dir() { + return Some((path, None)); } match storage.to_lowercase().as_str() { diff --git a/data-link/src/lib.rs b/data-link/src/lib.rs index ba925722..42de634f 100644 --- a/data-link/src/lib.rs +++ b/data-link/src/lib.rs @@ -136,7 +136,7 @@ impl Link { pub fn get_preview_synced(&self) -> Result { let runtime = tokio::runtime::Runtime::new().expect("Unable to create a runtime"); - return runtime.block_on(self.get_preview()); + runtime.block_on(self.get_preview()) } /// Get OGP metadata of the link. diff --git a/fs-atomic-versions/src/atomic/file.rs b/fs-atomic-versions/src/atomic/file.rs index a2b708ab..4947b77e 100644 --- a/fs-atomic-versions/src/atomic/file.rs +++ b/fs-atomic-versions/src/atomic/file.rs @@ -126,7 +126,7 @@ impl AtomicFile { /// Return the latest version together with vector of the /// files matching this version. Multiple files for the same version - /// can appear due to usage of file syncronization. Different devices + /// can appear due to usage of file synchronization. Different devices /// can create same version simultaneously. pub fn latest_version(&self) -> Result<(usize, Vec)> { let files_iterator = fs::read_dir(&self.directory)?.flatten(); diff --git a/fs-cache/Cargo.toml b/fs-cache/Cargo.toml new file mode 100644 index 00000000..543f8a69 --- /dev/null +++ b/fs-cache/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "fs-cache" +version = "0.1.0" +edition = "2021" + +[lib] +name = "fs_cache" +crate-type = ["rlib", "cdylib"] +bench = false + +[dependencies] +log = { version = "0.4.17", features = ["release_max_level_off"] } +data-error = { path = "../data-error" } +fs-storage = { path = "../fs-storage"} +fs-atomic-light = { path = "../fs-atomic-light" } +lru = "0.12.5" + +[dev-dependencies] +tempdir = "0.3.7" diff --git a/fs-cache/src/cache.rs b/fs-cache/src/cache.rs new file mode 100644 index 00000000..3b0824eb --- /dev/null +++ b/fs-cache/src/cache.rs @@ -0,0 +1,761 @@ +use std::fs; +use std::num::NonZeroUsize; +use std::path::{Path, PathBuf}; + +use data_error::{ArklibError, Result}; +use fs_atomic_light::temp_and_move; +use fs_storage::utils::extract_key_from_file_path; +use lru::LruCache; + +/// A cache entry that stores a value and its size in bytes. +/// +/// This structure is used to track both the actual data (value) +/// and its memory usage (size) in the cache. +struct CacheEntry { + value: V, + size: usize, +} + +/// A combined in-memory and disk-based cache system. +/// +/// This cache uses an LRU (Least Recently Used) eviction policy for the +/// in-memory portion and persists data to disk for long-term storage. +pub struct Cache { + /// Label for logging + label: String, + /// Path to the underlying folder where data is persisted + path: PathBuf, + /// An in-memory LRU cache for quick access to frequently used items. + memory_cache: LruCache>, + /// The current memory usage in bytes. + current_memory_bytes: usize, + /// The maximum allowable memory usage in bytes. + max_memory_bytes: usize, +} + +impl Cache +where + K: Ord + + Clone + + std::fmt::Display + + std::hash::Hash + + std::str::FromStr + + std::fmt::Debug, + V: Clone + std::fmt::Debug + AsRef<[u8]> + From>, +{ + /// Creates a new cache instance. + /// + /// # Arguments + /// * `label` - Identifier used in logs + /// * `path` - Directory where cache files are stored + /// * `max_memory_bytes` - Maximum bytes to keep in memory + /// * `preload_cache` - Whether to pre-load the cache from disk on initialization + pub fn new( + label: String, + path: &Path, + max_memory_bytes: usize, + preload_cache: bool, + ) -> Result { + Self::validate_path(path, &label)?; + + let memory_cache = LruCache::new( + NonZeroUsize::new(max_memory_bytes) + .expect("Capacity can't be zero"), + ); + + let mut cache = Self { + label: label.clone(), + path: PathBuf::from(path), + memory_cache, + current_memory_bytes: 0, + max_memory_bytes, + }; + + log::debug!( + "cache/{}: initialized with {} bytes limit", + label, + max_memory_bytes + ); + + if preload_cache { + cache.load_fs()?; + } + Ok(cache) + } + + /// Retrieves a value by its key, checking memory first then disk. + /// Returns None if the key doesn't exist. + pub fn get(&mut self, key: &K) -> Option { + log::debug!("cache/{}: retrieving value for key {}", self.label, key); + + if let Some(v) = self.fetch_from_memory(key) { + log::debug!( + "cache/{}: value for key {} retrieved from memory", + self.label, + key + ); + return Some(v); + } + if let Some(v) = self.fetch_from_disk(key) { + log::debug!( + "cache/{}: value for key {} retrieved from disk", + self.label, + key + ); + return Some(v); + } + + log::warn!("cache/{}: no value found for key {}", self.label, key); + None + } + + /// Stores a new value with the given key. + /// Returns error if the key already exists or if writing fails. + pub fn set(&mut self, key: K, value: V) -> Result<()> { + log::debug!("cache/{}: setting value for key {}", self.label, key); + + // Check if value already exists + if self.exists(&key) { + return Err(ArklibError::Storage( + self.label.clone(), + format!("Key {} already exists in cache", key), + )); + } + + // Always write to disk first + self.persist_to_disk(&key, &value)?; + + // Then update memory cache + self.update_memory_cache(&key, &value)?; + + log::debug!("cache/{}: set key={}", self.label, key); + Ok(()) + } + + /// Checks if a value exists either in memory or on disk. + pub fn exists(&self, key: &K) -> bool { + self.memory_cache.contains(key) + || self.path.join(key.to_string()).exists() + } + + /// Returns an iterator over all cached keys. + /// The keys are returned in filesystem directory order, + /// which should not be relied upon for any specific ordering. + pub fn keys(&self) -> Result> { + let entries = fs::read_dir(&self.path).map_err(|e| { + ArklibError::Storage(self.label.clone(), e.to_string()) + })?; + + let keys: Vec = entries + .filter_map(|entry| match entry { + Ok(entry) => { + let path = entry.path(); + if path.is_file() { + extract_key_from_file_path(&self.label, &path, true) + .ok() + } else { + None + } + } + Err(_) => None, + }) + .collect(); + + Ok(keys.into_iter()) + } + + /// Internal Methods: + /// Initializes the memory cache by loading the most recently modified files up to the memory limit. + /// + /// First collects metadata for all files, sorts them by modification time, and then loads as many + /// recent files as possible within the memory limit. Files that don't fit in memory remain only + /// on disk. + fn load_fs(&mut self) -> Result<()> { + // Collect metadata for all files + let mut file_metadata = Vec::new(); + for entry in fs::read_dir(&self.path)? { + let entry = entry?; + let path = entry.path(); + if path.is_file() { + let key: K = + extract_key_from_file_path(&self.label, &path, true)?; + let metadata = entry.metadata()?; + let modified = metadata.modified()?; + let size = metadata.len() as usize; + file_metadata.push((key, size, modified)); + } + } + + // Sort by modified time (most recent first) + file_metadata.sort_by(|a, b| b.2.cmp(&a.2)); + + // Clear existing cache + self.memory_cache.clear(); + self.current_memory_bytes = 0; + + // Load files that fit in memory + let mut loaded_bytes = 0; + let total_bytes: usize = file_metadata + .iter() + .map(|(_, size, _)| size) + .sum(); + + for (key, size, _) in file_metadata { + if loaded_bytes + size <= self.max_memory_bytes { + match self.read_from_disk(&key) { + Ok(value) => { + self.memory_cache + .put(key, CacheEntry { value, size }); + loaded_bytes += size; + } + Err(err) => { + log::warn!( + "cache/{}: failed to load key={}: {}", + self.label, + key, + err + ); + } + } + } + } + + self.current_memory_bytes = loaded_bytes; + + log::debug!( + "cache/{}: loaded {}/{} bytes in memory", + self.label, + self.current_memory_bytes, + total_bytes + ); + + Ok(()) + } + + /// Validates the provided path. + /// + /// # Arguments + /// * `path` - The path to validate + /// * `label` - Identifier used in logs + fn validate_path(path: &Path, label: &str) -> Result<()> { + if !path.exists() { + return Err(ArklibError::Storage( + label.to_owned(), + "Folder does not exist".to_owned(), + )); + } + + if !path.is_dir() { + return Err(ArklibError::Storage( + label.to_owned(), + "Path is not a directory".to_owned(), + )); + } + + Ok(()) + } + + /// Retrieves a value from the memory cache. + fn fetch_from_memory(&mut self, key: &K) -> Option { + self.memory_cache + .get(key) + .map(|entry| entry.value.clone()) + } + + /// Retrieves a value from disk and caches it in memory if possible. + fn fetch_from_disk(&mut self, key: &K) -> Option { + let file_path = self.path.join(key.to_string()); + if !file_path.exists() { + log::warn!("cache/{}: no value found for key {}", self.label, key); + return None; + } + + match self.read_from_disk(key) { + Ok(value) => { + if let Err(err) = self.update_memory_cache(key, &value) { + log::error!( + "cache/{}: failed to add to memory cache for key {}: {}", + self.label, + key, + err + ); + return None; + } + Some(value) + } + Err(err) => { + log::error!( + "cache/{}: failed to load from disk for key {}: {}", + self.label, + key, + err + ); + None + } + } + } + + /// Writes a value to disk using atomic operations. + fn persist_to_disk(&mut self, key: &K, value: &V) -> Result<()> { + log::debug!("cache/{}: writing to disk for key {}", self.label, key); + + if !self.path.exists() { + return Err(ArklibError::Storage( + self.label.clone(), + format!( + "Cache directory does not exist: {}", + self.path.display() + ), + )); + } + + let file_path = self.path.join(key.to_string()); + debug_assert!( + !file_path.exists(), + "File {} should not exist before writing", + file_path.display() + ); + + temp_and_move(value.as_ref(), &self.path, &key.to_string()).map_err( + |err| { + ArklibError::Storage( + self.label.clone(), + format!("Failed to write value for key {}: {}", key, err), + ) + }, + ) + } + + /// Reads a value from disk. + fn read_from_disk(&self, key: &K) -> Result + where + V: From>, // Add trait bound for reading binary data + { + let file_path = self.path.join(key.to_string()); + let contents = fs::read(&file_path)?; + Ok(V::from(contents)) + } + + /// Returns the size of a value in bytes. + /// + /// First checks the memory cache for size information, + /// falls back to checking the file size on disk if not found in memory. + fn get_file_size(&self, key: &K) -> Result { + if let Some(entry) = self.memory_cache.peek(key) { + return Ok(entry.size); + } + Ok(fs::metadata(self.path.join(key.to_string()))?.len() as usize) + } + + /// Adds or updates a value in the memory cache, evicting old entries if needed. + /// Logs error and skips caching if the value is larger than the maximum memory limit. + fn update_memory_cache(&mut self, key: &K, value: &V) -> Result<()> { + log::debug!("cache/{}: caching in memory for key {}", self.label, key); + let size = self.get_file_size(key)?; + + // If single value is larger than total limit, just skip memory caching + if size > self.max_memory_bytes { + log::error!( + "cache/{}: value size {} exceeds limit {}", + self.label, + size, + self.max_memory_bytes + ); + return Ok(()); + } + + // Remove oldest entries until we have space for new value + while self.current_memory_bytes + size > self.max_memory_bytes { + if let Some((_, old_entry)) = self.memory_cache.pop_lru() { + if self.current_memory_bytes < old_entry.size { + log::error!( + "cache/{}: Memory tracking inconsistency - current: {}, entry size: {}", + self.label, + self.current_memory_bytes, + old_entry.size + ); + self.current_memory_bytes = 0; + } else { + self.current_memory_bytes -= old_entry.size; + } + } + } + + // Add new value and update size + if let Some((evicted_key, _)) = self.memory_cache.push( + key.clone(), + CacheEntry { + value: value.clone(), + size, + }, + ) { + log::debug!( + "cache/{}: evicted key {} from memory cache", + self.label, + evicted_key + ); + } + + self.current_memory_bytes += size; + + log::debug!( + "cache/{}: added {} bytes, total {}/{}", + self.label, + size, + self.current_memory_bytes, + self.max_memory_bytes + ); + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::{ + fs::File, + io::Write, + time::{Duration, SystemTime}, + }; + use tempdir::TempDir; + + /// Helper function to create a temporary directory + fn create_temp_dir() -> TempDir { + TempDir::new("tmp").expect("Failed to create temporary directory") + } + + /// Helper function to create a test cache with default settings + fn create_test_cache(temp_dir: &TempDir) -> Cache> { + Cache::new( + "test".to_string(), + temp_dir.path(), + 1024 * 1024, // 1MB + true, // Enable preloading by default + ) + .expect("Failed to create cache") + } + + #[test] + fn test_new_cache() { + let temp_dir = create_temp_dir(); + let cache = create_test_cache(&temp_dir); + assert_eq!(cache.current_memory_bytes, 0); + assert_eq!(cache.max_memory_bytes, 1024 * 1024); + } + + #[test] + fn test_set_and_get() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + let key = "test_key".to_string(); + let value = vec![1, 2, 3, 4]; + + cache + .set(key.clone(), value.clone()) + .expect("Failed to set value"); + let retrieved = cache.get(&key).expect("Failed to get value"); + assert_eq!(retrieved, value); + } + + #[test] + fn test_exists() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + let key = "test_key".to_string(); + let value = vec![1, 2, 3, 4]; + + assert!(!cache.exists(&key)); + cache + .set(key.clone(), value) + .expect("Failed to set value"); + assert!(cache.exists(&key)); + } + + #[test] + fn test_get_nonexistent() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + assert!(cache.get(&"nonexistent".to_string()).is_none()); + } + + #[test] + fn test_keys() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + let values = vec![ + ("key1".to_string(), vec![1, 2]), + ("key2".to_string(), vec![3, 4]), + ("key3".to_string(), vec![5, 6]), + ]; + + // Add values + for (key, data) in values.iter() { + cache + .set(key.clone(), data.clone()) + .expect("Failed to set value"); + } + + // Check keys + let mut cache_keys: Vec<_> = cache + .keys() + .expect("Failed to get keys") + .collect(); + cache_keys.sort(); + let mut expected_keys: Vec<_> = + values.iter().map(|(k, _)| k.clone()).collect(); + expected_keys.sort(); + + assert_eq!(cache_keys, expected_keys); + } + + #[test] + fn test_memory_eviction() { + let temp_dir = create_temp_dir(); + let mut cache = Cache::new( + "test".to_string(), + temp_dir.path(), + 8, // Very small limit to force eviction + true, // Enable preloading by default + ) + .expect("Failed to create cache"); + + // Add first value + let key1 = "key1.txt".to_string(); + let value1 = vec![1, 2, 3, 4, 5, 7]; + cache + .set(key1.clone(), value1.clone()) + .expect("Failed to set value1"); + + // Add second value to trigger eviction + let key2 = "key2.json".to_string(); + let value2 = vec![5, 6, 8]; + cache + .set(key2.clone(), value2.clone()) + .expect("Failed to set value2"); + + // First value should be evicted from memory but still on disk + assert!(cache.memory_cache.get(&key1).is_none()); + assert_eq!(cache.get(&key1).unwrap(), value1); // Should load from disk + } + + #[test] + fn test_large_value_handling() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + let key = "large_key".to_string(); + let large_value = vec![0; 2 * 1024 * 1024]; // 2MB, larger than cache + + // Should fail to cache in memory but succeed in writing to disk + assert!(cache + .set(key.clone(), large_value.clone()) + .is_ok()); + assert_eq!(cache.get(&key).unwrap(), large_value); // Should load from disk + } + + #[test] + fn test_persistence() { + let temp_dir = create_temp_dir(); + let key = "persist_key".to_string(); + let value = vec![1, 2, 3, 4]; + + // Scope for first cache instance + { + let mut cache = + Cache::new("test".to_string(), temp_dir.path(), 1024, true) + .expect("Failed to create first cache"); + cache + .set(key.clone(), value.clone()) + .expect("Failed to set value"); + } + + // Create new cache instance pointing to same directory + let mut cache2 = + Cache::new("test".to_string(), temp_dir.path(), 1024, true) + .expect("Failed to create second cache"); + + // Should be able to read value written by first instance + let retrieved: Vec = cache2.get(&key).expect("Failed to get value"); + assert_eq!(retrieved, value); + } + + #[test] + fn test_concurrent_reads() { + use std::thread; + + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + let key = "test_key".to_string(); + let value = vec![1, 2, 3, 4]; + + // Set up initial cache with data + cache + .set(key.clone(), value.clone()) + .expect("Failed to set value"); + + // Create multiple reader caches + let mut handles: Vec>>> = vec![]; + for _ in 0..3 { + let key = key.clone(); + let cache_path = temp_dir.path().to_path_buf(); + + handles.push(thread::spawn(move || { + let mut reader_cache = + Cache::new("test".to_string(), &cache_path, 1024, true) + .expect("Failed to create reader cache"); + + reader_cache.get(&key) + })); + } + + // All readers should get the same value + for handle in handles { + let result = handle.join().expect("Thread panicked"); + assert_eq!(result.unwrap(), value); + } + } + + #[test] + fn test_duplicate_set() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + let key = "dup_key".to_string(); + let value1 = vec![1, 2, 3, 4]; + let value2 = vec![5, 6, 7, 8]; + + // First set + cache + .set(key.clone(), value1.clone()) + .expect("Failed to set first value"); + + // Second set with same key should panic + assert!(cache.set(key.clone(), value2).is_err()); + + // Should still have first value + let retrieved = cache.get(&key).expect("Failed to get value"); + assert_eq!(retrieved, value1); + } + + #[test] + fn test_loads_recent_files_first() { + let temp_dir = create_temp_dir(); + let mut cache: Cache> = Cache::new( + "test".to_string(), + temp_dir.path(), + 4, // Small limit to force selection + true, // Enable preloading by default + ) + .expect("Failed to create cache"); + + // Create files with different timestamps + let files = vec![ + ( + "old.txt", + vec![1, 2, 3], + SystemTime::now() - Duration::from_secs(100), + ), + ("new.txt", vec![3, 4], SystemTime::now()), + ]; + + for (name, data, time) in files { + let path = temp_dir.path().join(name); + let mut file = File::create(path).unwrap(); + file.write_all(&data).unwrap(); + file.set_modified(time).unwrap(); + file.sync_all().unwrap(); + } + + // Reload cache + cache.load_fs().expect("Failed to load files"); + + // Verify newer file is in memory + assert!(cache + .memory_cache + .contains(&"new.txt".to_string())); + assert!(!cache + .memory_cache + .contains(&"old.txt".to_string())); + } + + #[test] + #[should_panic(expected = "Capacity can't be zero")] + fn test_zero_capacity() { + let temp_dir = create_temp_dir(); + let _cache: std::result::Result>, ArklibError> = + Cache::new("test".to_string(), temp_dir.path(), 0, true); + } + + #[test] + fn test_memory_tracking() { + let temp_dir = create_temp_dir(); + let mut cache = create_test_cache(&temp_dir); + let key = "track_key".to_string(); + let value = vec![1, 2, 3, 4]; // 4 bytes + + cache + .set(key.clone(), value) + .expect("Failed to set value"); + + // Memory usage should match file size + assert_eq!(cache.current_memory_bytes, 4); + } + + #[test] + fn test_preload_behavior() { + let temp_dir = create_temp_dir(); + + // First create and populate a cache + let mut initial_cache = Cache::new( + "test".to_string(), + temp_dir.path(), + 1024, + false, // Don't preload yet + ) + .expect("Failed to create initial cache"); + + // Add some test data + let test_data = vec![ + ("key1".to_string(), vec![1, 2, 3]), + ("key2".to_string(), vec![4, 5, 6]), + ]; + + for (key, value) in &test_data { + initial_cache + .set(key.clone(), value.clone()) + .expect("Failed to set value"); + } + + // Drop the initial cache + drop(initial_cache); + + // Create new cache instances with different preload settings + let mut no_preload_cache: Cache> = Cache::new( + "test".to_string(), + temp_dir.path(), + 1024, + false, // Don't preload + ) + .expect("Failed to create no-preload cache"); + + let mut preload_cache: Cache> = Cache::new( + "test".to_string(), + temp_dir.path(), + 1024, + true, // Do preload + ) + .expect("Failed to create preload cache"); + + // Check memory cache state + for (key, _) in &test_data { + // No-preload cache should not have items in memory + assert!(no_preload_cache.memory_cache.get(key).is_none()); + + // Preload cache should have items in memory + assert!(preload_cache.memory_cache.get(key).is_some()); + + // Both should still be able to get the values + assert!(no_preload_cache.get(key).is_some()); + assert!(preload_cache.get(key).is_some()); + } + } +} diff --git a/fs-cache/src/lib.rs b/fs-cache/src/lib.rs new file mode 100644 index 00000000..acd91ad5 --- /dev/null +++ b/fs-cache/src/lib.rs @@ -0,0 +1,3 @@ +mod cache; + +pub use cache::Cache; diff --git a/fs-index/src/index.rs b/fs-index/src/index.rs index 6746ac29..e66ef917 100644 --- a/fs-index/src/index.rs +++ b/fs-index/src/index.rs @@ -75,7 +75,6 @@ type IndexedPaths = HashSet>; /// #### Reactive API /// - [`ResourceIndex::update_all`]: Method to update the index by rescanning /// files and returning changes (additions/deletions/updates). - /// /// #### Snapshot API /// - [`ResourceIndex::get_resources_by_id`]: Query resources from the index by diff --git a/fs-storage/src/file_storage.rs b/fs-storage/src/file_storage.rs index 2e3332e9..f4676984 100644 --- a/fs-storage/src/file_storage.rs +++ b/fs-storage/src/file_storage.rs @@ -1,8 +1,7 @@ use serde::{Deserialize, Serialize}; use std::{ collections::BTreeMap, - fs::{self, File}, - io::Write, + fs, path::{Path, PathBuf}, time::SystemTime, }; @@ -10,7 +9,7 @@ use std::{ use crate::{ base_storage::{BaseStorage, SyncStatus}, monoid::Monoid, - utils::read_version_2_fs, + utils::{read_version_2_fs, write_json_file}, }; use data_error::{ArklibError, Result}; @@ -262,13 +261,9 @@ where ) })?; fs::create_dir_all(parent_dir)?; - let mut file = File::create(&self.path)?; - file.write_all(serde_json::to_string_pretty(&self.data)?.as_bytes())?; - file.flush()?; let new_timestamp = SystemTime::now(); - file.set_modified(new_timestamp)?; - file.sync_all()?; + write_json_file(&self.path, &self.data, new_timestamp)?; self.modified = new_timestamp; self.written_to_disk = new_timestamp; diff --git a/fs-storage/src/folder_storage.rs b/fs-storage/src/folder_storage.rs index 68892a49..e1d147d2 100644 --- a/fs-storage/src/folder_storage.rs +++ b/fs-storage/src/folder_storage.rs @@ -1,6 +1,5 @@ use std::collections::BTreeSet; use std::fs::{self, File}; -use std::io::Write; use std::time::SystemTime; use std::{ collections::BTreeMap, @@ -9,6 +8,7 @@ use std::{ use crate::base_storage::{BaseStorage, SyncStatus}; use crate::monoid::Monoid; +use crate::utils::{extract_key_from_file_path, write_json_file}; use data_error::{ArklibError, Result}; /// Represents a folder storage system that persists data to disk. @@ -92,7 +92,8 @@ where .extension() .map_or(false, |ext| ext == "json") { - let key: K = extract_key_from_file_path(&self.label, &path)?; + let key: K = + extract_key_from_file_path(&self.label, &path, false)?; let file = File::open(&path)?; let value: V = serde_json::from_reader(file).map_err(|err| { @@ -280,7 +281,8 @@ where .extension() .map_or(false, |ext| ext == "json") { - let key = extract_key_from_file_path(&self.label, &path)?; + let key = + extract_key_from_file_path(&self.label, &path, false)?; if !self.data.contains_key(&key) && !self.deleted_keys.contains(&key) { @@ -348,13 +350,9 @@ where for (key, value) in &self.data { let file_path = self.path.join(format!("{}.json", key)); - let mut file = File::create(&file_path)?; - file.write_all(serde_json::to_string_pretty(&value)?.as_bytes())?; - file.flush()?; let new_timestamp = SystemTime::now(); - file.set_modified(new_timestamp)?; - file.sync_all()?; + write_json_file(&file_path, &value, new_timestamp)?; self.timestamps .insert(key.clone(), (new_timestamp, new_timestamp)); @@ -414,33 +412,6 @@ where } } -fn extract_key_from_file_path(label: &str, path: &Path) -> Result -where - K: std::str::FromStr, -{ - path.file_stem() - .ok_or_else(|| { - ArklibError::Storage( - label.to_owned(), - "Failed to extract file stem from filename".to_owned(), - ) - })? - .to_str() - .ok_or_else(|| { - ArklibError::Storage( - label.to_owned(), - "Failed to convert file stem to string".to_owned(), - ) - })? - .parse::() - .map_err(|_| { - ArklibError::Storage( - label.to_owned(), - "Failed to parse key from filename".to_owned(), - ) - }) -} - #[cfg(test)] mod tests { use crate::{ diff --git a/fs-storage/src/lib.rs b/fs-storage/src/lib.rs index 83e3868c..28f53603 100644 --- a/fs-storage/src/lib.rs +++ b/fs-storage/src/lib.rs @@ -6,7 +6,7 @@ pub mod folder_storage; pub mod jni; pub mod monoid; -mod utils; +pub mod utils; pub const ARK_FOLDER: &str = ".ark"; // Should not be lost if possible diff --git a/fs-storage/src/utils.rs b/fs-storage/src/utils.rs index d1e818bd..26cbecec 100644 --- a/fs-storage/src/utils.rs +++ b/fs-storage/src/utils.rs @@ -1,5 +1,7 @@ -use data_error::Result; -use std::{collections::BTreeMap, path::Path}; +use data_error::{ArklibError, Result}; +use serde::Serialize; +use std::io::Write; +use std::{collections::BTreeMap, fs::File, path::Path, time::SystemTime}; /// Parses version 2 `FileStorage` format and returns the data as a BTreeMap /// @@ -51,31 +53,127 @@ where Ok(data) } +/// Writes a serializable value to a file. +/// +/// This function takes a path, a serializable value, and a timestamp. It writes the value to the specified +/// file in a pretty JSON format. The function ensures that the file is flushed and synced after writing. +pub fn write_json_file( + path: &Path, + value: &T, + time: SystemTime, +) -> Result<()> { + let mut file = File::create(path)?; + file.write_all(serde_json::to_string_pretty(value)?.as_bytes())?; + file.flush()?; + + file.set_modified(time)?; + file.sync_all()?; + + Ok(()) +} + +/// Extracts a key of type K from the given file path. +/// +/// The function can include or exclude the file extension based on the `include_extension` parameter. +/// It returns a Result containing the parsed key or an error if the extraction or parsing fails. +pub fn extract_key_from_file_path( + label: &str, + path: &Path, + include_extension: bool, +) -> Result +where + K: std::str::FromStr, +{ + match include_extension { + true => path.file_name(), // ("tmp/foo.txt").file_name() -> ("foo.txt") + false => path.file_stem(), // ("tmp/foo.txt").file_stem() -> ("foo") + } + .ok_or_else(|| { + ArklibError::Storage( + label.to_owned(), + "Failed to extract file stem from filename".to_owned(), + ) + })? + .to_str() + .ok_or_else(|| { + ArklibError::Storage( + label.to_owned(), + "Failed to convert file stem to string".to_owned(), + ) + })? + .parse::() + .map_err(|_| { + ArklibError::Storage( + label.to_owned(), + "Failed to parse key from filename".to_owned(), + ) + }) +} + #[cfg(test)] mod tests { use super::*; + use serde_json::json; use std::io::Write; use tempdir::TempDir; /// Test reading a legacy version 2 `FileStorage` file #[test] fn test_read_legacy_fs() { - let temp_dir = TempDir::new("ark-rust").unwrap(); + let temp_dir = TempDir::new("ark-rust") + .expect("Failed to create temporary directory"); let file_path = temp_dir.path().join("test_read_legacy_fs"); let file_content = r#"version: 2 key1:1 key2:2 key3:3 "#; - let mut file = std::fs::File::create(&file_path).unwrap(); - file.write_all(file_content.as_bytes()).unwrap(); + let mut file = std::fs::File::create(&file_path) + .expect("Failed to create test file"); + file.write_all(file_content.as_bytes()) + .expect("Failed to write to test file"); // Read the file and check the data - let data: BTreeMap = - read_version_2_fs(&file_path).unwrap(); + let data: BTreeMap = read_version_2_fs(&file_path) + .expect("Failed to read version 2 file storage"); assert_eq!(data.len(), 3); assert_eq!(data.get("key1"), Some(&1)); assert_eq!(data.get("key2"), Some(&2)); assert_eq!(data.get("key3"), Some(&3)); } + + /// Test writing a JSON file + #[test] + fn test_write_json_file() { + let temp_dir = TempDir::new("ark-rust") + .expect("Failed to create temporary directory"); + let file_path = temp_dir.path().join("test_write_json_file.json"); + let value = json!({"key": "value"}); + + write_json_file(&file_path, &value, SystemTime::now()) + .expect("Failed to write JSON file"); + + let written_content = std::fs::read_to_string(&file_path) + .expect("Failed to read written JSON file"); + let expected_content = serde_json::to_string_pretty(&value) + .expect("Failed to serialize JSON value"); + assert_eq!(written_content, expected_content); + } + + /// Test extracting a key from a file path + #[test] + fn test_extract_key_from_file_path() { + let path_with_extension = Path::new("tmp/foo.txt"); + let path_without_extension = Path::new("tmp/foo"); + + let key_with_extension: String = + extract_key_from_file_path("test", path_with_extension, true) + .expect("Failed to extract key with extension"); + assert_eq!(key_with_extension, "foo.txt"); + + let key_without_extension: String = + extract_key_from_file_path("test", path_without_extension, false) + .expect("Failed to extract key without extension"); + assert_eq!(key_without_extension, "foo"); + } }