Skip to content

Commit

Permalink
feat(metadata): add codec_metadata_v2_to_v3
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin committed Feb 3, 2025
1 parent 9070e12 commit ee0e0f6
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 70 deletions.
4 changes: 4 additions & 0 deletions zarrs_metadata/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Derive `Copy` for `ArrayMetadataV2Order`
- Add `codec_metadata_v2_to_v3`

## [0.3.1] - 2025-01-29

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion zarrs_metadata/src/v2/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ impl Serialize for FillValueMetadataV2 {
}

/// The layout of bytes within each chunk of the array.
#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Debug)]
#[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Debug)]
pub enum ArrayMetadataV2Order {
/// Row-major order. The last dimension varies fastest.
C,
Expand Down
163 changes: 94 additions & 69 deletions zarrs_metadata/src/v2_to_v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{
data_type_metadata_v2_to_endianness, ArrayMetadataV2Order, DataTypeMetadataV2,
DataTypeMetadataV2InvalidEndiannessError, FillValueMetadataV2,
},
ArrayMetadataV2, GroupMetadataV2,
ArrayMetadataV2, GroupMetadataV2, MetadataV2,
},
v3::{
array::{
Expand All @@ -25,6 +25,7 @@ use crate::{
},
ArrayMetadataV3, GroupMetadataV3, MetadataV3,
},
Endianness,
};

use super::v3::array::data_type::DataTypeMetadataV3;
Expand Down Expand Up @@ -61,80 +62,27 @@ pub enum ArrayMetadataV2ToV3ConversionError {
Other(String),
}

/// Convert Zarr V2 array metadata to V3.
/// Convert Zarr V2 codec metadata to the equivalent Zarr V3 codec metadata.
///
/// # Errors
/// Returns a [`ArrayMetadataV2ToV3ConversionError`] if the metadata is invalid or is not compatible with Zarr V3 metadata.
#[allow(clippy::too_many_lines)]
pub fn array_metadata_v2_to_v3(
array_metadata_v2: &ArrayMetadataV2,
) -> Result<ArrayMetadataV3, ArrayMetadataV2ToV3ConversionError> {
let shape = array_metadata_v2.shape.clone();
let chunk_grid = MetadataV3::new_with_serializable_configuration(
crate::v3::array::chunk_grid::regular::IDENTIFIER,
&RegularChunkGridConfiguration {
chunk_shape: array_metadata_v2.chunks.clone(),
},
)?;

let (Ok(data_type), endianness) = (
data_type_metadata_v2_to_v3_data_type(&array_metadata_v2.dtype),
data_type_metadata_v2_to_endianness(&array_metadata_v2.dtype)
.map_err(ArrayMetadataV2ToV3ConversionError::InvalidEndianness)?,
) else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
match &array_metadata_v2.dtype {
DataTypeMetadataV2::Simple(dtype) => dtype.clone(),
DataTypeMetadataV2::Structured(dtype) => {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
format!("{dtype:?}"),
))
}
},
));
};

// Fill value
let mut fill_value = array_metadata_fill_value_v2_to_v3(&array_metadata_v2.fill_value)
.or_else(|| {
// Support zarr-python encoded string arrays with a `null` fill value
match data_type.name().as_str() {
"string" => Some(FillValueMetadataV3::String(String::new())),
_ => None,
}
})
.ok_or_else(|| {
// TODO: How best to deal with null fill values? What do other implementations do?
ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
)
})?;
if data_type.name() == "bool" {
// Map a 0/1 scalar fill value to a bool
if let Some(fill_value_uint) = fill_value.try_as_uint::<u64>() {
if fill_value_uint == 0 {
fill_value = FillValueMetadataV3::Bool(false);
} else if fill_value_uint == 1 {
fill_value = FillValueMetadataV3::Bool(true);
} else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
));
}
}
}

pub fn codec_metadata_v2_to_v3(
order: ArrayMetadataV2Order,
dimensionality: usize,
data_type: &DataTypeMetadataV3,
endianness: Option<Endianness>,
filters: &Option<Vec<MetadataV2>>,
compressor: &Option<MetadataV2>,
) -> Result<Vec<MetadataV3>, ArrayMetadataV2ToV3ConversionError> {
let mut codecs: Vec<MetadataV3> = vec![];

// Array-to-array codecs
if array_metadata_v2.order == ArrayMetadataV2Order::F {
if order == ArrayMetadataV2Order::F {
let transpose_metadata = MetadataV3::new_with_serializable_configuration(
crate::v3::array::codec::transpose::IDENTIFIER,
&TransposeCodecConfigurationV1 {
order: {
let f_order: Vec<usize> = (0..array_metadata_v2.shape.len()).rev().collect();
let f_order: Vec<usize> = (0..dimensionality).rev().collect();
unsafe {
// SAFETY: f_order is valid
TransposeOrder::new(&f_order).unwrap_unchecked()
Expand All @@ -147,7 +95,7 @@ pub fn array_metadata_v2_to_v3(

// Filters (array to array or array to bytes codecs)
let mut has_array_to_bytes = false;
if let Some(filters) = &array_metadata_v2.filters {
if let Some(filters) = filters {
for filter in filters {
// TODO: Add a V2 registry with V2 to V3 conversion functions
match filter.id() {
Expand All @@ -170,7 +118,7 @@ pub fn array_metadata_v2_to_v3(
}

// Compressor (array to bytes codec)
if let Some(compressor) = &array_metadata_v2.compressor {
if let Some(compressor) = compressor {
#[allow(clippy::single_match)]
match compressor.id() {
crate::v2::array::codec::zfpy::IDENTIFIER => {
Expand Down Expand Up @@ -206,7 +154,7 @@ pub fn array_metadata_v2_to_v3(
}

// Compressor (bytes to bytes codec)
if let Some(compressor) = &array_metadata_v2.compressor {
if let Some(compressor) = compressor {
match compressor.id() {
crate::v2::array::codec::zfpy::IDENTIFIER
| crate::v3::array::codec::pcodec::IDENTIFIER => {
Expand All @@ -216,7 +164,7 @@ pub fn array_metadata_v2_to_v3(
let blosc = serde_json::from_value::<BloscCodecConfigurationNumcodecs>(
serde_json::to_value(compressor.configuration())?,
)?;
let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, &data_type);
let configuration = codec_blosc_v2_numcodecs_to_v3(&blosc, data_type);
codecs.push(MetadataV3::new_with_serializable_configuration(
crate::v3::array::codec::blosc::IDENTIFIER,
&configuration,
Expand All @@ -239,6 +187,83 @@ pub fn array_metadata_v2_to_v3(
};
}

Ok(codecs)
}

/// Convert Zarr V2 array metadata to V3.
///
/// # Errors
/// Returns a [`ArrayMetadataV2ToV3ConversionError`] if the metadata is invalid or is not compatible with Zarr V3 metadata.
#[allow(clippy::too_many_lines)]
pub fn array_metadata_v2_to_v3(
array_metadata_v2: &ArrayMetadataV2,
) -> Result<ArrayMetadataV3, ArrayMetadataV2ToV3ConversionError> {
let shape = array_metadata_v2.shape.clone();
let chunk_grid = MetadataV3::new_with_serializable_configuration(
crate::v3::array::chunk_grid::regular::IDENTIFIER,
&RegularChunkGridConfiguration {
chunk_shape: array_metadata_v2.chunks.clone(),
},
)?;

let (Ok(data_type), endianness) = (
data_type_metadata_v2_to_v3_data_type(&array_metadata_v2.dtype),
data_type_metadata_v2_to_endianness(&array_metadata_v2.dtype)
.map_err(ArrayMetadataV2ToV3ConversionError::InvalidEndianness)?,
) else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
match &array_metadata_v2.dtype {
DataTypeMetadataV2::Simple(dtype) => dtype.clone(),
DataTypeMetadataV2::Structured(dtype) => {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedDataType(
format!("{dtype:?}"),
))

Check warning on line 220 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L215-L220

Added lines #L215 - L220 were not covered by tests
}
},
));
};

// Fill value
let mut fill_value = array_metadata_fill_value_v2_to_v3(&array_metadata_v2.fill_value)
.or_else(|| {
// Support zarr-python encoded string arrays with a `null` fill value
match data_type.name().as_str() {
"string" => Some(FillValueMetadataV3::String(String::new())),
_ => None,

Check warning on line 232 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L232

Added line #L232 was not covered by tests
}
})
.ok_or_else(|| {
// TODO: How best to deal with null fill values? What do other implementations do?
ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
)

Check warning on line 240 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L236-L240

Added lines #L236 - L240 were not covered by tests
})?;
if data_type.name() == "bool" {
// Map a 0/1 scalar fill value to a bool
if let Some(fill_value_uint) = fill_value.try_as_uint::<u64>() {
if fill_value_uint == 0 {
fill_value = FillValueMetadataV3::Bool(false);
} else if fill_value_uint == 1 {
fill_value = FillValueMetadataV3::Bool(true);
} else {
return Err(ArrayMetadataV2ToV3ConversionError::UnsupportedFillValue(
data_type.to_string(),
array_metadata_v2.fill_value.clone(),
));

Check warning on line 253 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L244-L253

Added lines #L244 - L253 were not covered by tests
}
}

Check warning on line 255 in zarrs_metadata/src/v2_to_v3.rs

View check run for this annotation

Codecov / codecov/patch

zarrs_metadata/src/v2_to_v3.rs#L255

Added line #L255 was not covered by tests
}

let codecs = codec_metadata_v2_to_v3(
array_metadata_v2.order,
array_metadata_v2.shape.len(),
&data_type,
endianness,
&array_metadata_v2.filters,
&array_metadata_v2.compressor,
)?;

let chunk_key_encoding = MetadataV3::new_with_serializable_configuration(
crate::v3::array::chunk_key_encoding::v2::IDENTIFIER,
&V2ChunkKeyEncodingConfiguration {
Expand Down

0 comments on commit ee0e0f6

Please sign in to comment.