diff --git a/zarrs_metadata/CHANGELOG.md b/zarrs_metadata/CHANGELOG.md index 81f3923f..06bd0a1d 100644 --- a/zarrs_metadata/CHANGELOG.md +++ b/zarrs_metadata/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Derive `Copy` for `ArrayMetadataV2Order` +- Add `codec_metadata_v2_to_v3` + ## [0.3.1] - 2025-01-29 ### Fixed diff --git a/zarrs_metadata/src/v2/array.rs b/zarrs_metadata/src/v2/array.rs index aedebbfa..c679e5fb 100644 --- a/zarrs_metadata/src/v2/array.rs +++ b/zarrs_metadata/src/v2/array.rs @@ -296,7 +296,7 @@ impl Serialize for FillValueMetadataV2 { } /// The layout of bytes within each chunk of the array. -#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Debug)] +#[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Debug)] pub enum ArrayMetadataV2Order { /// Row-major order. The last dimension varies fastest. C, diff --git a/zarrs_metadata/src/v2_to_v3.rs b/zarrs_metadata/src/v2_to_v3.rs index b922b91a..58892540 100644 --- a/zarrs_metadata/src/v2_to_v3.rs +++ b/zarrs_metadata/src/v2_to_v3.rs @@ -11,7 +11,7 @@ use crate::{ data_type_metadata_v2_to_endianness, ArrayMetadataV2Order, DataTypeMetadataV2, DataTypeMetadataV2InvalidEndiannessError, FillValueMetadataV2, }, - ArrayMetadataV2, GroupMetadataV2, + ArrayMetadataV2, GroupMetadataV2, MetadataV2, }, v3::{ array::{ @@ -25,6 +25,7 @@ use crate::{ }, ArrayMetadataV3, GroupMetadataV3, MetadataV3, }, + Endianness, }; use super::v3::array::data_type::DataTypeMetadataV3; @@ -61,80 +62,27 @@ pub enum ArrayMetadataV2ToV3ConversionError { Other(String), } -/// Convert Zarr V2 array metadata to V3. +/// Convert Zarr V2 codec metadata to the equivalent Zarr V3 codec metadata. /// /// # Errors /// Returns a [`ArrayMetadataV2ToV3ConversionError`] if the metadata is invalid or is not compatible with Zarr V3 metadata. -#[allow(clippy::too_many_lines)] -pub fn array_metadata_v2_to_v3( - array_metadata_v2: &ArrayMetadataV2, -) -> Result { - let shape = array_metadata_v2.shape.clone(); - let chunk_grid = MetadataV3::new_with_serializable_configuration( - crate::v3::array::chunk_grid::regular::IDENTIFIER, - &RegularChunkGridConfiguration { - chunk_shape: array_metadata_v2.chunks.clone(), - }, - )?; - - let (Ok(data_type), endianness) = ( - data_type_metadata_v2_to_v3_data_type(&array_metadata_v2.dtype), - data_type_metadata_v2_to_endianness(&array_metadata_v2.dtype) - .map_err(ArrayMetadataV2ToV3ConversionError::InvalidEndianness)?, - ) else { - return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType( - match &array_metadata_v2.dtype { - DataTypeMetadataV2::Simple(dtype) => dtype.clone(), - DataTypeMetadataV2::Structured(dtype) => { - return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType( - format!("{dtype:?}"), - )) - } - }, - )); - }; - - // Fill value - let mut fill_value = array_metadata_fill_value_v2_to_v3(&array_metadata_v2.fill_value) - .or_else(|| { - // Support zarr-python encoded string arrays with a `null` fill value - match data_type.name().as_str() { - "string" => Some(FillValueMetadataV3::String(String::new())), - _ => None, - } - }) - .ok_or_else(|| { - // TODO: How best to deal with null fill values? What do other implementations do? - ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue( - data_type.to_string(), - array_metadata_v2.fill_value.clone(), - ) - })?; - if data_type.name() == "bool" { - // Map a 0/1 scalar fill value to a bool - if let Some(fill_value_uint) = fill_value.try_as_uint::() { - if fill_value_uint == 0 { - fill_value = FillValueMetadataV3::Bool(false); - } else if fill_value_uint == 1 { - fill_value = FillValueMetadataV3::Bool(true); - } else { - return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue( - data_type.to_string(), - array_metadata_v2.fill_value.clone(), - )); - } - } - } - +pub fn codec_metadata_v2_to_v3( + order: ArrayMetadataV2Order, + dimensionality: usize, + data_type: &DataTypeMetadataV3, + endianness: Option, + filters: &Option>, + compressor: &Option, +) -> Result, ArrayMetadataV2ToV3ConversionError> { let mut codecs: Vec = vec![]; // Array-to-array codecs - if array_metadata_v2.order == ArrayMetadataV2Order::F { + if order == ArrayMetadataV2Order::F { let transpose_metadata = MetadataV3::new_with_serializable_configuration( crate::v3::array::codec::transpose::IDENTIFIER, &TransposeCodecConfigurationV1 { order: { - let f_order: Vec = (0..array_metadata_v2.shape.len()).rev().collect(); + let f_order: Vec = (0..dimensionality).rev().collect(); unsafe { // SAFETY: f_order is valid TransposeOrder::new(&f_order).unwrap_unchecked() @@ -147,7 +95,7 @@ pub fn array_metadata_v2_to_v3( // Filters (array to array or array to bytes codecs) let mut has_array_to_bytes = false; - if let Some(filters) = &array_metadata_v2.filters { + if let Some(filters) = filters { for filter in filters { // TODO: Add a V2 registry with V2 to V3 conversion functions match filter.id() { @@ -170,7 +118,7 @@ pub fn array_metadata_v2_to_v3( } // Compressor (array to bytes codec) - if let Some(compressor) = &array_metadata_v2.compressor { + if let Some(compressor) = compressor { #[allow(clippy::single_match)] match compressor.id() { crate::v2::array::codec::zfpy::IDENTIFIER => { @@ -206,7 +154,7 @@ pub fn array_metadata_v2_to_v3( } // Compressor (bytes to bytes codec) - if let Some(compressor) = &array_metadata_v2.compressor { + if let Some(compressor) = compressor { match compressor.id() { crate::v2::array::codec::zfpy::IDENTIFIER | crate::v3::array::codec::pcodec::IDENTIFIER => { @@ -216,7 +164,7 @@ pub fn array_metadata_v2_to_v3( let blosc = serde_json::from_value::( serde_json::to_value(compressor.configuration())?, )?; - let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, &data_type); + let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, data_type); codecs.push(MetadataV3::new_with_serializable_configuration( crate::v3::array::codec::blosc::IDENTIFIER, &configuration, @@ -239,6 +187,83 @@ pub fn array_metadata_v2_to_v3( }; } + Ok(codecs) +} + +/// Convert Zarr V2 array metadata to V3. +/// +/// # Errors +/// Returns a [`ArrayMetadataV2ToV3ConversionError`] if the metadata is invalid or is not compatible with Zarr V3 metadata. +#[allow(clippy::too_many_lines)] +pub fn array_metadata_v2_to_v3( + array_metadata_v2: &ArrayMetadataV2, +) -> Result { + let shape = array_metadata_v2.shape.clone(); + let chunk_grid = MetadataV3::new_with_serializable_configuration( + crate::v3::array::chunk_grid::regular::IDENTIFIER, + &RegularChunkGridConfiguration { + chunk_shape: array_metadata_v2.chunks.clone(), + }, + )?; + + let (Ok(data_type), endianness) = ( + data_type_metadata_v2_to_v3_data_type(&array_metadata_v2.dtype), + data_type_metadata_v2_to_endianness(&array_metadata_v2.dtype) + .map_err(ArrayMetadataV2ToV3ConversionError::InvalidEndianness)?, + ) else { + return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType( + match &array_metadata_v2.dtype { + DataTypeMetadataV2::Simple(dtype) => dtype.clone(), + DataTypeMetadataV2::Structured(dtype) => { + return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType( + format!("{dtype:?}"), + )) + } + }, + )); + }; + + // Fill value + let mut fill_value = array_metadata_fill_value_v2_to_v3(&array_metadata_v2.fill_value) + .or_else(|| { + // Support zarr-python encoded string arrays with a `null` fill value + match data_type.name().as_str() { + "string" => Some(FillValueMetadataV3::String(String::new())), + _ => None, + } + }) + .ok_or_else(|| { + // TODO: How best to deal with null fill values? What do other implementations do? + ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue( + data_type.to_string(), + array_metadata_v2.fill_value.clone(), + ) + })?; + if data_type.name() == "bool" { + // Map a 0/1 scalar fill value to a bool + if let Some(fill_value_uint) = fill_value.try_as_uint::() { + if fill_value_uint == 0 { + fill_value = FillValueMetadataV3::Bool(false); + } else if fill_value_uint == 1 { + fill_value = FillValueMetadataV3::Bool(true); + } else { + return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue( + data_type.to_string(), + array_metadata_v2.fill_value.clone(), + )); + } + } + } + + let codecs = codec_metadata_v2_to_v3( + array_metadata_v2.order, + array_metadata_v2.shape.len(), + &data_type, + endianness, + &array_metadata_v2.filters, + &array_metadata_v2.compressor, + )?; + let chunk_key_encoding = MetadataV3::new_with_serializable_configuration( crate::v3::array::chunk_key_encoding::v2::IDENTIFIER, &V2ChunkKeyEncodingConfiguration {