Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions src/access_moppy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,41 @@ def ordered(ds, core=("lat", "lon", "time", "height")):

self.ds = ordered(self.ds)

def ensure_time_first(self):
"""
Ensure `time` is the first dimension in all CMORised data variables.

CMIP tables do not always list `time` first (e.g. `longitude latitude time`),
but actual CMIP6 NetCDF files follow the NetCDF convention of placing the
unlimited record dimension first: `(time, [vertical], lat, lon)`.

This method transposes any data variable whose leading dimension is not
`time` while preserving the relative order of the remaining dimensions.
Coordinate variables (1-D coords, bounds, scalar coords) are left untouched.
"""
if "time" not in self.ds.dims:
return

for var in list(self.ds.variables):
dims = list(self.ds[var].dims)
if "time" in dims and dims[0] != "time":
new_order = ["time"] + [d for d in dims if d != "time"]
self.ds[var] = self.ds[var].transpose(*new_order)

current_sizes = list(self.ds.sizes.keys())
if current_sizes and current_sizes[0] == "time":
return

ordered_vars = {}
for name in ("time", "time_bnds"):
if name in self.ds.variables:
ordered_vars[name] = self.ds[name]
for name in self.ds.variables:
if name not in ordered_vars:
ordered_vars[name] = self.ds[name]

self.ds = xr.Dataset(ordered_vars, attrs=self.ds.attrs)

def _build_drs_path(self, attrs: Dict[str, str]) -> Path:
"""
Build DRS path using the vocabulary class's controlled vocabulary specifications.
Expand Down Expand Up @@ -1259,6 +1294,8 @@ def run(self, write_output: bool = False):
self.standardize_missing_values()
self.update_attributes()
self.reorder()
# Ensure time is the leading dimension, matching NetCDF/CMIP6 conventions
self.ensure_time_first()
# Final rechunking before writing for optimal I/O performance
if write_output:
self.rechunk_dataset()
Expand Down