From 4a730440ba4490223291a22caf5de46fe5daf47c Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 6 Feb 2024 09:30:03 +1100 Subject: [PATCH] Add a global configuration `config::Config` with `validate_checksums` option --- CHANGELOG.md | 4 + Cargo.toml | 2 +- .../bytes_to_bytes/crc32c/crc32c_codec.rs | 15 ++-- src/config.rs | 78 +++++++++++++++++++ src/lib.rs | 1 + 5 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 src/config.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 36c391f6..f1d59085 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + - Add a global configuration `config::Config` accessible via `config::{get_config,get_config_mut}` + - Currently it exposes a single configuration option: `validate_checksums` (default: `true`) + ## [0.11.5] - 2024-02-05 ### Fixed diff --git a/Cargo.toml b/Cargo.toml index e47ee3e5..539fee1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "zarrs" -version = "0.11.5" +version = "0.11.6" authors = ["Lachlan Deakin "] edition = "2021" rust-version = "1.71" diff --git a/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs b/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs index 35f82399..fc5f1643 100644 --- a/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs +++ b/src/array/codec/bytes_to_bytes/crc32c/crc32c_codec.rs @@ -67,14 +67,15 @@ impl BytesToBytesCodecTraits for Crc32cCodec { _parallel: bool, ) -> Result, CodecError> { if encoded_value.len() >= CHECKSUM_SIZE { - let decoded_value = &encoded_value[..encoded_value.len() - CHECKSUM_SIZE]; - let checksum = crc32c::crc32c(decoded_value).to_le_bytes(); - if checksum == encoded_value[encoded_value.len() - CHECKSUM_SIZE..] { - encoded_value.resize_with(encoded_value.len() - CHECKSUM_SIZE, Default::default); - Ok(encoded_value) - } else { - Err(CodecError::InvalidChecksum) + if crate::config::global_config().validate_checksums() { + let decoded_value = &encoded_value[..encoded_value.len() - CHECKSUM_SIZE]; + let checksum = crc32c::crc32c(decoded_value).to_le_bytes(); + if checksum != encoded_value[encoded_value.len() - CHECKSUM_SIZE..] { + return Err(CodecError::InvalidChecksum); + } } + encoded_value.resize_with(encoded_value.len() - CHECKSUM_SIZE, Default::default); + Ok(encoded_value) } else { Err(CodecError::Other( "CRC32C checksum decoder expects a 32 bit input".to_string(), diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 00000000..9e089460 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,78 @@ +//! Zarrs global configuration options. + +use std::sync::{OnceLock, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +/// Global configuration options for the zarrs crate. +/// +/// Retrieve the global [`Config`] with [`global_config`] and modify it with [`global_config_mut`]. +/// +/// ## Configuration Options +/// +/// ### Validate Checksums +/// > default: [`true`] +/// +/// If enabled, checksum codecs (e.g. `crc32c`) will validate that encoded data matches stored checksums, otherwise validation is skipped. +/// Note that regardless of this configuration option, checksum codecs may skip validation when partial decoding. +#[derive(Debug)] +pub struct Config { + validate_checksums: bool, +} + +#[allow(clippy::derivable_impls)] +impl Default for Config { + fn default() -> Self { + Config { + validate_checksums: true, + } + } +} + +impl Config { + /// Get the [validate checksums](#validate-checksums) configuration. + #[must_use] + pub fn validate_checksums(&self) -> bool { + self.validate_checksums + } + + /// Set the [validate checksums](#validate-checksums) configuration. + pub fn set_validate_checksums(&mut self, validate_checksums: bool) { + self.validate_checksums = validate_checksums; + } +} + +static CONFIG: OnceLock> = OnceLock::new(); + +/// Returns a reference to the global zarrs configuration. +/// +/// # Panics +/// This function panics if the underlying lock has been poisoned and might panic if the global config is already held by the current thread. +pub fn global_config() -> RwLockReadGuard<'static, Config> { + CONFIG + .get_or_init(|| RwLock::new(Config::default())) + .read() + .unwrap() +} + +/// Returns a mutable reference to the global zarrs configuration. +/// +/// # Panics +/// This function panics if the underlying lock has been poisoned and might panic if the global config is already held by the current thread. +pub fn global_config_mut() -> RwLockWriteGuard<'static, Config> { + CONFIG + .get_or_init(|| RwLock::new(Config::default())) + .write() + .unwrap() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn config_validate_checksums() { + assert!(global_config().validate_checksums()); + global_config_mut().set_validate_checksums(false); + assert!(!global_config().validate_checksums()); + global_config_mut().set_validate_checksums(true); + } +} diff --git a/src/lib.rs b/src/lib.rs index 952ad77b..5033e96c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,6 +87,7 @@ pub mod array; pub mod array_subset; pub mod byte_range; +pub mod config; pub mod group; pub mod metadata; pub mod node;