diff --git a/src/access_moppy/base.py b/src/access_moppy/base.py index 56843b26..d541df04 100644 --- a/src/access_moppy/base.py +++ b/src/access_moppy/base.py @@ -735,6 +735,41 @@ def ordered(ds, core=("lat", "lon", "time", "height")): self.ds = ordered(self.ds) + def ensure_time_first(self): + """ + Ensure `time` is the first dimension in all CMORised data variables. + + CMIP tables do not always list `time` first (e.g. `longitude latitude time`), + but actual CMIP6 NetCDF files follow the NetCDF convention of placing the + unlimited record dimension first: `(time, [vertical], lat, lon)`. + + This method transposes any data variable whose leading dimension is not + `time` while preserving the relative order of the remaining dimensions. + Coordinate variables (1-D coords, bounds, scalar coords) are left untouched. + """ + if "time" not in self.ds.dims: + return + + for var in list(self.ds.variables): + dims = list(self.ds[var].dims) + if "time" in dims and dims[0] != "time": + new_order = ["time"] + [d for d in dims if d != "time"] + self.ds[var] = self.ds[var].transpose(*new_order) + + current_sizes = list(self.ds.sizes.keys()) + if current_sizes and current_sizes[0] == "time": + return + + ordered_vars = {} + for name in ("time", "time_bnds"): + if name in self.ds.variables: + ordered_vars[name] = self.ds[name] + for name in self.ds.variables: + if name not in ordered_vars: + ordered_vars[name] = self.ds[name] + + self.ds = xr.Dataset(ordered_vars, attrs=self.ds.attrs) + def _build_drs_path(self, attrs: Dict[str, str]) -> Path: """ Build DRS path using the vocabulary class's controlled vocabulary specifications. @@ -1259,6 +1294,8 @@ def run(self, write_output: bool = False): self.standardize_missing_values() self.update_attributes() self.reorder() + # Ensure time is the leading dimension, matching NetCDF/CMIP6 conventions + self.ensure_time_first() # Final rechunking before writing for optimal I/O performance if write_output: self.rechunk_dataset()