|
| 1 | +# Eiger Detector Virtual Dataset Support |
| 2 | +# |
| 3 | +# Eiger detectors produce data files with pattern-based naming (detector-0.h5, detector-1.h5, ...) |
| 4 | +# where frames are stacked along a dimension that should be unlimited for dynamic file discovery. |
| 5 | +# This module provides specialized support for this use case. |
| 6 | + |
| 7 | +""" |
| 8 | + calculate_block_selection(template::HyperslabSelection, block_idx::Int) |
| 9 | +
|
| 10 | +Calculate the selection for a specific block in a pattern-based VDS. |
| 11 | +
|
| 12 | +For Eiger-style VDS with H5S_UNLIMITED: |
| 13 | +- Template has START(0), STRIDE(5), COUNT(H5S_UNLIMITED), BLOCK(5) |
| 14 | +- Block 0: START(0), STRIDE(1), COUNT(1), BLOCK(5) → selects 0:4 |
| 15 | +- Block 1: START(5), STRIDE(1), COUNT(1), BLOCK(5) → selects 5:9 |
| 16 | +
|
| 17 | +This converts the unlimited template into a concrete selection for a specific file. |
| 18 | +""" |
| 19 | +function calculate_block_selection(template::HyperslabSelection, block_idx::Int) |
| 20 | + # For each dimension, if COUNT is H5S_UNLIMITED, calculate specific block placement |
| 21 | + start = copy(template.start) |
| 22 | + stride = copy(template.stride) |
| 23 | + count = copy(template.count) |
| 24 | + block = copy(template.block) |
| 25 | + |
| 26 | + for i in 1:length(count) |
| 27 | + if is_unlimited(count[i]) |
| 28 | + # This dimension uses pattern - calculate position for this block |
| 29 | + start[i] = template.start[i] + UInt64(block_idx) * template.stride[i] |
| 30 | + stride[i] = UInt64(1) |
| 31 | + count[i] = UInt64(1) |
| 32 | + # block[i] stays the same - it's the size of each block |
| 33 | + end |
| 34 | + end |
| 35 | + |
| 36 | + return HyperslabSelection(start, stride, count, block) |
| 37 | +end |
| 38 | + |
| 39 | +""" |
| 40 | + calculate_dynamic_vds_dims(f::JLDFile, mappings, static_dims_hdf5) |
| 41 | +
|
| 42 | +Calculate VDS dimensions dynamically from pattern files with H5S_UNLIMITED. |
| 43 | +
|
| 44 | +When a VDS uses file patterns (%b) with H5S_UNLIMITED count, the actual dimensions |
| 45 | +are determined at read time by counting how many files match the pattern. |
| 46 | +""" |
| 47 | +function calculate_dynamic_vds_dims(f::JLDFile, mappings::Vector{VirtualMapping}, |
| 48 | + static_dims_hdf5::NTuple{N, Int64}) where N |
| 49 | + # Start with static dimensions (some may be placeholders) |
| 50 | + dynamic_dims_hdf5 = collect(static_dims_hdf5) |
| 51 | + |
| 52 | + # For each mapping with pattern and unlimited count, calculate actual extent |
| 53 | + for mapping in mappings |
| 54 | + occursin("%b", mapping.source_filename) || continue |
| 55 | + num_files = length(expand_file_pattern(mapping.source_filename, f)) |
| 56 | + num_files > 0 || continue |
| 57 | + |
| 58 | + # Update dimensions where count is H5S_UNLIMITED |
| 59 | + for (i, count) in enumerate(mapping.vds_selection.count) |
| 60 | + is_unlimited(count) || continue |
| 61 | + # For Eiger: start=0, stride=5, num_files=3 → dimension size = 0 + 3*5 = 15 |
| 62 | + dim_size = Int(mapping.vds_selection.start[i]) + num_files * Int(mapping.vds_selection.stride[i]) |
| 63 | + dynamic_dims_hdf5[i] = max(dynamic_dims_hdf5[i], dim_size) |
| 64 | + end |
| 65 | + end |
| 66 | + |
| 67 | + return reverse(Tuple(dynamic_dims_hdf5)) # Convert to Julia order |
| 68 | +end |
| 69 | + |
| 70 | +""" |
| 71 | + create_virtual_dataset(parent, name, source_pattern, dataset_name, src_dims, element_type, unlimited_dims) |
| 72 | +
|
| 73 | +Create Eiger-style pattern-based virtual dataset with H5S_UNLIMITED for dynamic file discovery. |
| 74 | +
|
| 75 | +# Arguments |
| 76 | +- `parent::Union{JLDFile, Group}`: Container file or group |
| 77 | +- `name::String`: Virtual dataset name |
| 78 | +- `source_pattern::String`: Pattern with %b placeholder (e.g., "detector-%b.jld2") |
| 79 | +- `dataset_name::String`: Dataset name in each source file |
| 80 | +- `src_dims::Tuple`: Dimensions of each source dataset (Julia order) |
| 81 | +- `element_type::Type`: Element type (e.g., Float32, Int32) |
| 82 | +- `unlimited_dims::Tuple{Vararg{Int}}`: Tuple of dimension indices that are unlimited (1-based Julia indexing) |
| 83 | +
|
| 84 | +Creates a VDS that expands dynamically as more source files matching the pattern are added. |
| 85 | +The pattern %b will be replaced with 0, 1, 2, ... to find source files at read time. |
| 86 | +
|
| 87 | +The initial VDS dimensions are computed from existing files at read time. |
| 88 | +
|
| 89 | +# Example |
| 90 | +```julia |
| 91 | +# Each source file has 10×10×5 frames, VDS concatenates along last dimension (dimension 3) |
| 92 | +jldopen("vds.jld2", "w") do f |
| 93 | + create_virtual_dataset(f, "all_frames", "detector-%b.jld2", "frames", |
| 94 | + (10, 10, 5), Float32, (3,)) # dimension 3 is unlimited |
| 95 | +end |
| 96 | +``` |
| 97 | +""" |
| 98 | +function create_virtual_dataset(parent::Union{JLDFile, Group}, name::String, |
| 99 | + source_pattern::String, dataset_name::String, |
| 100 | + src_dims::Tuple, element_type::Type, unlimited_dims::Tuple{Vararg{Int}}) |
| 101 | + ndims = length(src_dims) |
| 102 | + |
| 103 | + # Validate unlimited_dims |
| 104 | + for dim_idx in unlimited_dims |
| 105 | + 1 <= dim_idx <= ndims || throw(ArgumentError("Unlimited dimension index $dim_idx out of range [1, $ndims]")) |
| 106 | + end |
| 107 | + |
| 108 | + isempty(unlimited_dims) && @warn "Pattern contains %b but no unlimited dimensions specified - VDS will have fixed size" |
| 109 | + |
| 110 | + # Create source selection (selects entire source dataset) |
| 111 | + src_selection = HyperslabSelection( |
| 112 | + zeros(UInt64, ndims), # start at 0 |
| 113 | + ones(UInt64, ndims), # stride = 1 |
| 114 | + ones(UInt64, ndims), # count = 1 |
| 115 | + collect(UInt64, reverse(src_dims)) # block = source dimensions (HDF5 order) |
| 116 | + ) |
| 117 | + |
| 118 | + # Create VDS selection with H5S_UNLIMITED in the dynamic dimension(s) |
| 119 | + vds_start = zeros(UInt64, ndims) |
| 120 | + vds_stride = ones(UInt64, ndims) # Default stride = 1 for simple block selection |
| 121 | + vds_count = ones(UInt64, ndims) # Default count = 1 for simple block selection |
| 122 | + vds_block = collect(UInt64, reverse(src_dims)) |
| 123 | + |
| 124 | + # Set stride and count for unlimited dimensions |
| 125 | + for dim_idx in unlimited_dims |
| 126 | + hdf5_idx = ndims - dim_idx + 1 # Convert to HDF5 order |
| 127 | + vds_stride[hdf5_idx] = UInt64(src_dims[dim_idx]) # Stride = source size in that dimension |
| 128 | + vds_count[hdf5_idx] = H5S_UNLIMITED % UInt64 # -1 as UInt64 |
| 129 | + end |
| 130 | + |
| 131 | + vds_selection = HyperslabSelection(vds_start, vds_stride, vds_count, vds_block) |
| 132 | + mapping = VirtualMapping(source_pattern, dataset_name, src_selection, vds_selection) |
| 133 | + |
| 134 | + # Create VDS dimensions and max_dimensions |
| 135 | + # Initial VDS dims = source dims (will be computed dynamically at read time) |
| 136 | + vds_dims = src_dims |
| 137 | + max_dims = ntuple(ndims) do i |
| 138 | + i in unlimited_dims ? H5S_UNLIMITED : src_dims[i] |
| 139 | + end |
| 140 | + |
| 141 | + # Create the VDS with max_dimensions |
| 142 | + return create_virtual_dataset(parent, name, vds_dims, element_type, [mapping]; max_dims) |
| 143 | +end |
0 commit comments