23
23
24
24
try :
25
25
import omfiles
26
- import omfiles ._numcodecs
27
26
except ModuleNotFoundError : # pragma: no cover
28
27
raise ImportError (
29
28
"omfiles is required for kerchunking Open-Meteo files. Please install with "
@@ -183,11 +182,9 @@ def __init__(
183
182
inline_threshold = 500 ,
184
183
storage_options = None ,
185
184
chunk_no = None ,
186
- domain = None ,
187
- reference_time = None ,
188
- time_step = 3600 ,
185
+ domain = None
189
186
):
190
- # Initialize a reader for your om file
187
+ # Initialize a reader for om file
191
188
if isinstance (om_file , (pathlib .Path , str )):
192
189
fs , path = fsspec .core .url_to_fs (om_file , ** (storage_options or {}))
193
190
self .input_file = fs .open (path , "rb" )
@@ -204,7 +201,10 @@ def __init__(
204
201
self .inline = inline_threshold
205
202
self .store_dict = {}
206
203
self .store = dict_to_store (self .store_dict )
207
- self .name = "data" # FIXME: This should be the name from om-variable
204
+ # FIXME: This should be the name from om-variable, but currently variables don't need to be named in omfiles
205
+ # self.name = self.reader.name
206
+ # For now, hardcode the name to "data"
207
+ self .name = "data"
208
208
209
209
if domain is not None and chunk_no is not None :
210
210
start_step = chunk_number_to_start_time (domain = domain , chunk_no = chunk_no )
@@ -225,15 +225,19 @@ def translate(self):
225
225
add_offset = self .reader .add_offset
226
226
lut = self .reader .get_complete_lut ()
227
227
228
- # Get dimension names if available, otherwise use defaults
229
- # FIXME: Currently we don't have dimension names exposed by the reader (or even necessarily in the file)
230
- dim_names = getattr (self .reader , "dimension_names" , ["x" , "y" , "time" ])
228
+ assert len (shape ) == 3 , "Only 3D arrays are currently supported"
229
+ assert len (chunks ) == 3 , "Only 3D arrays are currently supported"
230
+
231
+ # FIXME: Currently we don't have a real convention how to store dimension names in om files
232
+ # It can be easily achieved via the hierarchical structure, but just not finalized yet.
233
+ # For now, just hardcode dimension values
234
+ dim_names = ["x" , "y" , "time" ]
231
235
232
236
# Calculate number of chunks in each dimension
233
237
chunks_per_dim = [math .ceil (s / c ) for s , c in zip (shape , chunks )]
234
238
235
239
# 2. Create Zarr array metadata (.zarray)
236
- blocksize = chunks [0 ] * chunks [1 ] * chunks [2 ] if len ( chunks ) >= 3 else chunks [ 0 ] * chunks [ 1 ]
240
+ blocksize = chunks [0 ] * chunks [1 ] * chunks [2 ]
237
241
238
242
zarray = {
239
243
"zarr_format" : 2 ,
@@ -263,31 +267,27 @@ def translate(self):
263
267
for chunk_idx in range (len (lut ) - 1 ):
264
268
# Calculate chunk coordinates (i,j,k) from linear index
265
269
chunk_coords = self ._get_chunk_coords (chunk_idx , chunks_per_dim )
270
+ chunk_key = self .name + "/" + "." .join (map (str , chunk_coords ))
266
271
267
- # Calculate chunk size.
268
- # Loop index is defined so this is safe!
269
- chunk_size = lut [chunk_idx + 1 ] - lut [chunk_idx ]
270
-
271
- # Add to references
272
- key = self .name + "/" + "." .join (map (str , chunk_coords ))
272
+ # Calculate chunk offset and chunk size
273
+ chunk_offset = lut [chunk_idx ]
274
+ chunk_size = lut [chunk_idx + 1 ] - chunk_offset
273
275
274
276
# Check if chunk is small enough to inline
275
277
if self .inline > 0 and chunk_size < self .inline :
276
278
# Read the chunk data and inline it
277
- self .input_file .seek (lut [ chunk_idx ] )
279
+ self .input_file .seek (chunk_offset )
278
280
data = self .input_file .read (chunk_size )
279
- try :
280
- # Try to decode as ASCII
281
- self .store_dict [key ] = data .decode ('ascii' )
282
- except UnicodeDecodeError :
283
- # If not ASCII, encode as base64
284
- self .store_dict [key ] = b"base64:" + base64 .b64encode (data )
281
+ # Encode as base64, similar to what is done in hdf.py
282
+ self .store_dict [chunk_key ] = b"base64:" + base64 .b64encode (data )
285
283
else :
286
284
# Otherwise store as reference
287
- self .store_dict [key ] = [self .url , lut [ chunk_idx ] , chunk_size ]
285
+ self .store_dict [chunk_key ] = [self .url , chunk_offset , chunk_size ]
288
286
289
- # 5. Create coordinate arrays. TODO: This needs to be improved
290
- # Add coordinate arrays for ALL dimensions
287
+ # 5. Create coordinate arrays.
288
+ # TODO: This needs to be improved, because we need coordinates for all dimensions
289
+ # Grid definitions / coordinate arrays might be calculated in the python-omfiles directly in the future:
290
+ # https://github.com/open-meteo/python-omfiles/pull/32/files
291
291
for i , dim_name in enumerate (dim_names ):
292
292
dim_size = shape [i ]
293
293
if dim_name == "time" :
@@ -299,10 +299,8 @@ def translate(self):
299
299
300
300
# Convert to proper format for return
301
301
if self .spec < 1 :
302
- print ("self.spec < 1" )
303
302
return self .store
304
303
else :
305
- print ("translate_refs_serializable" )
306
304
translate_refs_serializable (self .store_dict )
307
305
store = _encode_for_JSON (self .store_dict )
308
306
return {"version" : 1 , "refs" : store }
@@ -319,7 +317,7 @@ def _add_time_coordinate(self, time_dim, time_axis=0):
319
317
320
318
# Format the reference time as CF-compliant string
321
319
if isinstance (ref_time , datetime .datetime ):
322
- # Calculate hours since epoch (1970-01-01)
320
+ # Calculate seconds since epoch (1970-01-01)
323
321
epoch = datetime .datetime (1970 , 1 , 1 , 0 , 0 , 0 )
324
322
seconds_since_epoch = int ((ref_time - epoch ).total_seconds ())
325
323
@@ -367,12 +365,6 @@ def _add_time_coordinate(self, time_dim, time_axis=0):
367
365
# Add time values inline (they're small)
368
366
self .store_dict [f"{ time_dim_name } /0" ] = time_values .tobytes ()
369
367
370
- # Debug info
371
- print (f"Created time coordinate '{ time_dim_name } ' with { time_dim } values" )
372
- print (f"Time units: { units } " )
373
- if time_dim > 0 :
374
- print (f"First timestamp: { time_values [0 ]} seconds since 1970-01-01, Last: { time_values [- 1 ]} " )
375
-
376
368
def _get_chunk_coords (self , idx , chunks_per_dim ):
377
369
"""Convert linear chunk index to multidimensional coordinates
378
370
0 commit comments