Skip to content

Commit 0758c17

Browse files
committed
separate patternbased implementation into separate file
1 parent d1ca1ab commit 0758c17

File tree

3 files changed

+145
-124
lines changed

3 files changed

+145
-124
lines changed

src/JLD2.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
180180
parallel_read::Bool=false,
181181
plain::Bool=false
182182
) where T<:Union{Type{IOStream},Type{MmapIO}}
183-
183+
184184
mmaparrays && @warn "mmaparrays keyword is currently ignored" maxlog = 1
185185
filters = Filters.normalize_filters(compress)
186186

@@ -502,6 +502,7 @@ using .Filters: WrittenFilterPipeline, FilterPipeline, iscompressed
502502
using .Filters: Shuffle, Deflate, ZstdFilter
503503

504504
include("virtual_datasets.jl")
505+
include("virtual_datasets_patternbased.jl")
505506
include("datasets.jl")
506507
include("global_heaps.jl")
507508
include("fractal_heaps.jl")

src/virtual_datasets.jl

Lines changed: 0 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -255,54 +255,6 @@ function assign_to_vds!(result, source_subset, selection::DataspaceSelection, vd
255255
end
256256
end
257257

258-
# Calculate the selection for a specific block in a pattern-based VDS
259-
# For Eiger: template has START(0), STRIDE(5), COUNT(H5S_UNLIMITED), BLOCK(5)
260-
# Block 0: START(0), STRIDE(1), COUNT(1), BLOCK(5) → selects 0:4
261-
# Block 1: START(5), STRIDE(1), COUNT(1), BLOCK(5) → selects 5:9
262-
function calculate_block_selection(template::HyperslabSelection, block_idx::Int)
263-
# For each dimension, if COUNT is H5S_UNLIMITED, calculate specific block placement
264-
start = copy(template.start)
265-
stride = copy(template.stride)
266-
count = copy(template.count)
267-
block = copy(template.block)
268-
269-
for i in 1:length(count)
270-
if is_unlimited(count[i])
271-
# This dimension uses pattern - calculate position for this block
272-
start[i] = template.start[i] + UInt64(block_idx) * template.stride[i]
273-
stride[i] = UInt64(1)
274-
count[i] = UInt64(1)
275-
# block[i] stays the same - it's the size of each block
276-
end
277-
end
278-
279-
return HyperslabSelection(start, stride, count, block)
280-
end
281-
282-
# Calculate VDS dimensions dynamically from pattern files with H5S_UNLIMITED
283-
function calculate_dynamic_vds_dims(f::JLDFile, mappings::Vector{VirtualMapping},
284-
static_dims_hdf5::NTuple{N, Int64}) where N
285-
# Start with static dimensions (some may be placeholders)
286-
dynamic_dims_hdf5 = collect(static_dims_hdf5)
287-
288-
# For each mapping with pattern and unlimited count, calculate actual extent
289-
for mapping in mappings
290-
occursin("%b", mapping.source_filename) || continue
291-
num_files = length(expand_file_pattern(mapping.source_filename, f))
292-
num_files > 0 || continue
293-
294-
# Update dimensions where count is H5S_UNLIMITED
295-
for (i, count) in enumerate(mapping.vds_selection.count)
296-
is_unlimited(count) || continue
297-
# For Eiger: start=0, stride=5, num_files=3 → dimension size = 0 + 3*5 = 15
298-
dim_size = Int(mapping.vds_selection.start[i]) + num_files * Int(mapping.vds_selection.stride[i])
299-
dynamic_dims_hdf5[i] = max(dynamic_dims_hdf5[i], dim_size)
300-
end
301-
end
302-
303-
return reverse(Tuple(dynamic_dims_hdf5)) # Convert to Julia order
304-
end
305-
306258
# Combine virtual mappings into a single dataset
307259
function combine_virtual_mappings(f::JLDFile, mappings::Vector{VirtualMapping},
308260
dataspace::ReadDataspace, dt::H5Datatype)
@@ -586,81 +538,6 @@ function create_virtual_dataset(parent::Union{JLDFile, Group}, name::String,
586538
return offset
587539
end
588540

589-
"""
590-
create_virtual_dataset(parent, name, source_pattern, dataset_name, src_dims, element_type, unlimited_dims)
591-
592-
Create pattern-based virtual dataset with H5S_UNLIMITED for dynamic file discovery.
593-
594-
# Arguments
595-
- `parent::Union{JLDFile, Group}`: Container file or group
596-
- `name::String`: Virtual dataset name
597-
- `source_pattern::String`: Pattern with %b placeholder (e.g., "detector-%b.jld2")
598-
- `dataset_name::String`: Dataset name in each source file
599-
- `src_dims::Tuple`: Dimensions of each source dataset (Julia order)
600-
- `element_type::Type`: Element type (e.g., Float32, Int32)
601-
- `unlimited_dims::Tuple{Vararg{Int}}`: Tuple of dimension indices that are unlimited (1-based Julia indexing)
602-
603-
Creates a VDS that expands dynamically as more source files matching the pattern are added.
604-
The pattern %b will be replaced with 0, 1, 2, ... to find source files at read time.
605-
606-
The initial VDS dimensions are computed from existing files at read time.
607-
608-
# Example
609-
```julia
610-
# Each source file has 10×10×5 frames, VDS concatenates along last dimension (dimension 3)
611-
jldopen("vds.jld2", "w") do f
612-
create_virtual_dataset(f, "all_frames", "detector-%b.jld2", "frames",
613-
(10, 10, 5), Float32, (3,)) # dimension 3 is unlimited
614-
end
615-
```
616-
"""
617-
function create_virtual_dataset(parent::Union{JLDFile, Group}, name::String,
618-
source_pattern::String, dataset_name::String,
619-
src_dims::Tuple, element_type::Type, unlimited_dims::Tuple{Vararg{Int}})
620-
ndims = length(src_dims)
621-
622-
# Validate unlimited_dims
623-
for dim_idx in unlimited_dims
624-
1 <= dim_idx <= ndims || throw(ArgumentError("Unlimited dimension index $dim_idx out of range [1, $ndims]"))
625-
end
626-
627-
isempty(unlimited_dims) && @warn "Pattern contains %b but no unlimited dimensions specified - VDS will have fixed size"
628-
629-
# Create source selection (selects entire source dataset)
630-
src_selection = HyperslabSelection(
631-
zeros(UInt64, ndims), # start at 0
632-
ones(UInt64, ndims), # stride = 1
633-
ones(UInt64, ndims), # count = 1
634-
collect(UInt64, reverse(src_dims)) # block = source dimensions (HDF5 order)
635-
)
636-
637-
# Create VDS selection with H5S_UNLIMITED in the dynamic dimension(s)
638-
vds_start = zeros(UInt64, ndims)
639-
vds_stride = ones(UInt64, ndims) # Default stride = 1 for simple block selection
640-
vds_count = ones(UInt64, ndims) # Default count = 1 for simple block selection
641-
vds_block = collect(UInt64, reverse(src_dims))
642-
643-
# Set stride and count for unlimited dimensions
644-
for dim_idx in unlimited_dims
645-
hdf5_idx = ndims - dim_idx + 1 # Convert to HDF5 order
646-
vds_stride[hdf5_idx] = UInt64(src_dims[dim_idx]) # Stride = source size in that dimension
647-
vds_count[hdf5_idx] = H5S_UNLIMITED % UInt64 # -1 as UInt64
648-
end
649-
650-
vds_selection = HyperslabSelection(vds_start, vds_stride, vds_count, vds_block)
651-
mapping = VirtualMapping(source_pattern, dataset_name, src_selection, vds_selection)
652-
653-
# Create VDS dimensions and max_dimensions
654-
# Initial VDS dims = source dims (will be computed dynamically at read time)
655-
vds_dims = src_dims
656-
max_dims = ntuple(ndims) do i
657-
i in unlimited_dims ? H5S_UNLIMITED : src_dims[i]
658-
end
659-
660-
# Create the VDS with max_dimensions
661-
return create_virtual_dataset(parent, name, vds_dims, element_type, [mapping]; max_dims)
662-
end
663-
664541
"""
665542
create_virtual_dataset(parent, name, source_files, dataset_name)
666543
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# Eiger Detector Virtual Dataset Support
2+
#
3+
# Eiger detectors produce data files with pattern-based naming (detector-0.h5, detector-1.h5, ...)
4+
# where frames are stacked along a dimension that should be unlimited for dynamic file discovery.
5+
# This module provides specialized support for this use case.
6+
7+
"""
8+
calculate_block_selection(template::HyperslabSelection, block_idx::Int)
9+
10+
Calculate the selection for a specific block in a pattern-based VDS.
11+
12+
For Eiger-style VDS with H5S_UNLIMITED:
13+
- Template has START(0), STRIDE(5), COUNT(H5S_UNLIMITED), BLOCK(5)
14+
- Block 0: START(0), STRIDE(1), COUNT(1), BLOCK(5) → selects 0:4
15+
- Block 1: START(5), STRIDE(1), COUNT(1), BLOCK(5) → selects 5:9
16+
17+
This converts the unlimited template into a concrete selection for a specific file.
18+
"""
19+
function calculate_block_selection(template::HyperslabSelection, block_idx::Int)
20+
# For each dimension, if COUNT is H5S_UNLIMITED, calculate specific block placement
21+
start = copy(template.start)
22+
stride = copy(template.stride)
23+
count = copy(template.count)
24+
block = copy(template.block)
25+
26+
for i in 1:length(count)
27+
if is_unlimited(count[i])
28+
# This dimension uses pattern - calculate position for this block
29+
start[i] = template.start[i] + UInt64(block_idx) * template.stride[i]
30+
stride[i] = UInt64(1)
31+
count[i] = UInt64(1)
32+
# block[i] stays the same - it's the size of each block
33+
end
34+
end
35+
36+
return HyperslabSelection(start, stride, count, block)
37+
end
38+
39+
"""
40+
calculate_dynamic_vds_dims(f::JLDFile, mappings, static_dims_hdf5)
41+
42+
Calculate VDS dimensions dynamically from pattern files with H5S_UNLIMITED.
43+
44+
When a VDS uses file patterns (%b) with H5S_UNLIMITED count, the actual dimensions
45+
are determined at read time by counting how many files match the pattern.
46+
"""
47+
function calculate_dynamic_vds_dims(f::JLDFile, mappings::Vector{VirtualMapping},
48+
static_dims_hdf5::NTuple{N, Int64}) where N
49+
# Start with static dimensions (some may be placeholders)
50+
dynamic_dims_hdf5 = collect(static_dims_hdf5)
51+
52+
# For each mapping with pattern and unlimited count, calculate actual extent
53+
for mapping in mappings
54+
occursin("%b", mapping.source_filename) || continue
55+
num_files = length(expand_file_pattern(mapping.source_filename, f))
56+
num_files > 0 || continue
57+
58+
# Update dimensions where count is H5S_UNLIMITED
59+
for (i, count) in enumerate(mapping.vds_selection.count)
60+
is_unlimited(count) || continue
61+
# For Eiger: start=0, stride=5, num_files=3 → dimension size = 0 + 3*5 = 15
62+
dim_size = Int(mapping.vds_selection.start[i]) + num_files * Int(mapping.vds_selection.stride[i])
63+
dynamic_dims_hdf5[i] = max(dynamic_dims_hdf5[i], dim_size)
64+
end
65+
end
66+
67+
return reverse(Tuple(dynamic_dims_hdf5)) # Convert to Julia order
68+
end
69+
70+
"""
71+
create_virtual_dataset(parent, name, source_pattern, dataset_name, src_dims, element_type, unlimited_dims)
72+
73+
Create Eiger-style pattern-based virtual dataset with H5S_UNLIMITED for dynamic file discovery.
74+
75+
# Arguments
76+
- `parent::Union{JLDFile, Group}`: Container file or group
77+
- `name::String`: Virtual dataset name
78+
- `source_pattern::String`: Pattern with %b placeholder (e.g., "detector-%b.jld2")
79+
- `dataset_name::String`: Dataset name in each source file
80+
- `src_dims::Tuple`: Dimensions of each source dataset (Julia order)
81+
- `element_type::Type`: Element type (e.g., Float32, Int32)
82+
- `unlimited_dims::Tuple{Vararg{Int}}`: Tuple of dimension indices that are unlimited (1-based Julia indexing)
83+
84+
Creates a VDS that expands dynamically as more source files matching the pattern are added.
85+
The pattern %b will be replaced with 0, 1, 2, ... to find source files at read time.
86+
87+
The initial VDS dimensions are computed from existing files at read time.
88+
89+
# Example
90+
```julia
91+
# Each source file has 10×10×5 frames, VDS concatenates along last dimension (dimension 3)
92+
jldopen("vds.jld2", "w") do f
93+
create_virtual_dataset(f, "all_frames", "detector-%b.jld2", "frames",
94+
(10, 10, 5), Float32, (3,)) # dimension 3 is unlimited
95+
end
96+
```
97+
"""
98+
function create_virtual_dataset(parent::Union{JLDFile, Group}, name::String,
99+
source_pattern::String, dataset_name::String,
100+
src_dims::Tuple, element_type::Type, unlimited_dims::Tuple{Vararg{Int}})
101+
ndims = length(src_dims)
102+
103+
# Validate unlimited_dims
104+
for dim_idx in unlimited_dims
105+
1 <= dim_idx <= ndims || throw(ArgumentError("Unlimited dimension index $dim_idx out of range [1, $ndims]"))
106+
end
107+
108+
isempty(unlimited_dims) && @warn "Pattern contains %b but no unlimited dimensions specified - VDS will have fixed size"
109+
110+
# Create source selection (selects entire source dataset)
111+
src_selection = HyperslabSelection(
112+
zeros(UInt64, ndims), # start at 0
113+
ones(UInt64, ndims), # stride = 1
114+
ones(UInt64, ndims), # count = 1
115+
collect(UInt64, reverse(src_dims)) # block = source dimensions (HDF5 order)
116+
)
117+
118+
# Create VDS selection with H5S_UNLIMITED in the dynamic dimension(s)
119+
vds_start = zeros(UInt64, ndims)
120+
vds_stride = ones(UInt64, ndims) # Default stride = 1 for simple block selection
121+
vds_count = ones(UInt64, ndims) # Default count = 1 for simple block selection
122+
vds_block = collect(UInt64, reverse(src_dims))
123+
124+
# Set stride and count for unlimited dimensions
125+
for dim_idx in unlimited_dims
126+
hdf5_idx = ndims - dim_idx + 1 # Convert to HDF5 order
127+
vds_stride[hdf5_idx] = UInt64(src_dims[dim_idx]) # Stride = source size in that dimension
128+
vds_count[hdf5_idx] = H5S_UNLIMITED % UInt64 # -1 as UInt64
129+
end
130+
131+
vds_selection = HyperslabSelection(vds_start, vds_stride, vds_count, vds_block)
132+
mapping = VirtualMapping(source_pattern, dataset_name, src_selection, vds_selection)
133+
134+
# Create VDS dimensions and max_dimensions
135+
# Initial VDS dims = source dims (will be computed dynamically at read time)
136+
vds_dims = src_dims
137+
max_dims = ntuple(ndims) do i
138+
i in unlimited_dims ? H5S_UNLIMITED : src_dims[i]
139+
end
140+
141+
# Create the VDS with max_dimensions
142+
return create_virtual_dataset(parent, name, vds_dims, element_type, [mapping]; max_dims)
143+
end

0 commit comments

Comments
 (0)