Skip to content

Commit 7469d55

Browse files
committed
some cleanup
1 parent 48b94d2 commit 7469d55

File tree

1 file changed

+27
-35
lines changed

1 file changed

+27
-35
lines changed

kerchunk/open_meteo.py

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
try:
2525
import omfiles
26-
import omfiles._numcodecs
2726
except ModuleNotFoundError: # pragma: no cover
2827
raise ImportError(
2928
"omfiles is required for kerchunking Open-Meteo files. Please install with "
@@ -183,11 +182,9 @@ def __init__(
183182
inline_threshold=500,
184183
storage_options=None,
185184
chunk_no=None,
186-
domain=None,
187-
reference_time=None,
188-
time_step=3600,
185+
domain=None
189186
):
190-
# Initialize a reader for your om file
187+
# Initialize a reader for om file
191188
if isinstance(om_file, (pathlib.Path, str)):
192189
fs, path = fsspec.core.url_to_fs(om_file, **(storage_options or {}))
193190
self.input_file = fs.open(path, "rb")
@@ -204,7 +201,10 @@ def __init__(
204201
self.inline = inline_threshold
205202
self.store_dict = {}
206203
self.store = dict_to_store(self.store_dict)
207-
self.name = "data" # FIXME: This should be the name from om-variable
204+
# FIXME: This should be the name from om-variable, but currently variables don't need to be named in omfiles
205+
# self.name = self.reader.name
206+
# For now, hardcode the name to "data"
207+
self.name = "data"
208208

209209
if domain is not None and chunk_no is not None:
210210
start_step = chunk_number_to_start_time(domain=domain, chunk_no=chunk_no)
@@ -225,15 +225,19 @@ def translate(self):
225225
add_offset = self.reader.add_offset
226226
lut = self.reader.get_complete_lut()
227227

228-
# Get dimension names if available, otherwise use defaults
229-
# FIXME: Currently we don't have dimension names exposed by the reader (or even necessarily in the file)
230-
dim_names = getattr(self.reader, "dimension_names", ["x", "y", "time"])
228+
assert len(shape) == 3, "Only 3D arrays are currently supported"
229+
assert len(chunks) == 3, "Only 3D arrays are currently supported"
230+
231+
# FIXME: Currently we don't have a real convention how to store dimension names in om files
232+
# It can be easily achieved via the hierarchical structure, but just not finalized yet.
233+
# For now, just hardcode dimension values
234+
dim_names = ["x", "y", "time"]
231235

232236
# Calculate number of chunks in each dimension
233237
chunks_per_dim = [math.ceil(s/c) for s, c in zip(shape, chunks)]
234238

235239
# 2. Create Zarr array metadata (.zarray)
236-
blocksize = chunks[0] * chunks[1] * chunks[2] if len(chunks) >= 3 else chunks[0] * chunks[1]
240+
blocksize = chunks[0] * chunks[1] * chunks[2]
237241

238242
zarray = {
239243
"zarr_format": 2,
@@ -263,31 +267,27 @@ def translate(self):
263267
for chunk_idx in range(len(lut) - 1):
264268
# Calculate chunk coordinates (i,j,k) from linear index
265269
chunk_coords = self._get_chunk_coords(chunk_idx, chunks_per_dim)
270+
chunk_key = self.name + "/" + ".".join(map(str, chunk_coords))
266271

267-
# Calculate chunk size.
268-
# Loop index is defined so this is safe!
269-
chunk_size = lut[chunk_idx + 1] - lut[chunk_idx]
270-
271-
# Add to references
272-
key = self.name + "/" + ".".join(map(str, chunk_coords))
272+
# Calculate chunk offset and chunk size
273+
chunk_offset = lut[chunk_idx]
274+
chunk_size = lut[chunk_idx + 1] - chunk_offset
273275

274276
# Check if chunk is small enough to inline
275277
if self.inline > 0 and chunk_size < self.inline:
276278
# Read the chunk data and inline it
277-
self.input_file.seek(lut[chunk_idx])
279+
self.input_file.seek(chunk_offset)
278280
data = self.input_file.read(chunk_size)
279-
try:
280-
# Try to decode as ASCII
281-
self.store_dict[key] = data.decode('ascii')
282-
except UnicodeDecodeError:
283-
# If not ASCII, encode as base64
284-
self.store_dict[key] = b"base64:" + base64.b64encode(data)
281+
# Encode as base64, similar to what is done in hdf.py
282+
self.store_dict[chunk_key] = b"base64:" + base64.b64encode(data)
285283
else:
286284
# Otherwise store as reference
287-
self.store_dict[key] = [self.url, lut[chunk_idx], chunk_size]
285+
self.store_dict[chunk_key] = [self.url, chunk_offset, chunk_size]
288286

289-
# 5. Create coordinate arrays. TODO: This needs to be improved
290-
# Add coordinate arrays for ALL dimensions
287+
# 5. Create coordinate arrays.
288+
# TODO: This needs to be improved, because we need coordinates for all dimensions
289+
# Grid definitions / coordinate arrays might be calculated in the python-omfiles directly in the future:
290+
# https://github.com/open-meteo/python-omfiles/pull/32/files
291291
for i, dim_name in enumerate(dim_names):
292292
dim_size = shape[i]
293293
if dim_name == "time":
@@ -299,10 +299,8 @@ def translate(self):
299299

300300
# Convert to proper format for return
301301
if self.spec < 1:
302-
print("self.spec < 1")
303302
return self.store
304303
else:
305-
print("translate_refs_serializable")
306304
translate_refs_serializable(self.store_dict)
307305
store = _encode_for_JSON(self.store_dict)
308306
return {"version": 1, "refs": store}
@@ -319,7 +317,7 @@ def _add_time_coordinate(self, time_dim, time_axis=0):
319317

320318
# Format the reference time as CF-compliant string
321319
if isinstance(ref_time, datetime.datetime):
322-
# Calculate hours since epoch (1970-01-01)
320+
# Calculate seconds since epoch (1970-01-01)
323321
epoch = datetime.datetime(1970, 1, 1, 0, 0, 0)
324322
seconds_since_epoch = int((ref_time - epoch).total_seconds())
325323

@@ -367,12 +365,6 @@ def _add_time_coordinate(self, time_dim, time_axis=0):
367365
# Add time values inline (they're small)
368366
self.store_dict[f"{time_dim_name}/0"] = time_values.tobytes()
369367

370-
# Debug info
371-
print(f"Created time coordinate '{time_dim_name}' with {time_dim} values")
372-
print(f"Time units: {units}")
373-
if time_dim > 0:
374-
print(f"First timestamp: {time_values[0]} seconds since 1970-01-01, Last: {time_values[-1]}")
375-
376368
def _get_chunk_coords(self, idx, chunks_per_dim):
377369
"""Convert linear chunk index to multidimensional coordinates
378370

0 commit comments

Comments
 (0)