Parcels-code · VeckoTheGecko · Apr 2, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,9 @@ credentials.json
 *.egg-info
 __pycache__
 build/
-parcels/
 .asv/
 html/
+.DS_Store
+
+data
+.env
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "Parcels"]
+	path = Parcels
+	url = git@github.com:Parcels-code/Parcels
diff --git a/Parcels b/Parcels
diff --git a/README.md b/README.md
@@ -6,15 +6,30 @@ This repository houses performance benchmarks for [Parcels](https://github.com/O
 
 ## Development instructions
 
+This project uses a combination of [Pixi](https://pixi.sh/dev/installation/), [ASV](https://asv.readthedocs.io/), and [intake-xarray](https://github.com/intake/intake-xarray) to coordinate the setting up and running of benchmarks.
+
+- Scripts are used to download the datasets required into the correct location
+- intake-xarray is used to define data catalogues which can be easily accessed from within benchmark scripts
+- ASV is used to run the benchmarks (see the [Writing the benchmarks](#writing-the-benchmarks) section).
+- Pixi is used to orchestrate all the above into a convenient, user friendly workflow
+
+You can run `pixi task list` to see the list of available tasks in the workspace.
+
+In brief, you can set up the data and run the benchmarks by doing:
+
 - [install Pixi](https://pixi.sh/dev/installation/) `curl -fsSL https://pixi.sh/install.sh | bash`
 - `pixi install`
-- `pixi run asv run`
+- `PARCELS_BENCHMARKS_DATA_FOLDER=./data pixi run benchmarks`
 
-You can run the linting with `pixi run lint`
+> [!NOTE]
+> The syntax `PARCELS_BENCHMARKS_DATA_FOLDER=./data pixi run ...` set's the environment variable for the task, but you can set environment variables [in other ways](https://askubuntu.com/a/58828) as well.
 
 > [!IMPORTANT]
-> The default path for the benchmark data is set by [pooch.os_cache](https://www.fatiando.org/pooch/latest/api/generated/pooch.os_cache.html), which typically is a subdirectory of your home directory. Currently, you will need at least 50GB of disk space available to store the benchmark data.
-> To change the location of the benchmark data cache, you can set the environment variable `PARCELS_DATADIR` to a preferred location to store the benchmark data.
+> Currently, you will need at least 50GB of disk space available to store the unzipped benchmark data. Since the zips are deleted after downloaded and extracted, this ends up being about 80GB of disk space needed.
+> You need to be explicit to determine where the benchmark data will be saved by
+> setting the `PARCELS_BENCHMARKS_DATA_FOLDER` environment variable. This
+> environment variable is used in the downloading of the data and definition of
+> the benchmarks.
 
 To view the benchmark data
 
@@ -34,7 +49,7 @@ Members of the Parcels community can contribute benchmark data using the followi
 2. Clone your fork onto your system
 
 ```
-git clone git@github.com:<your-github-handle>/parcels-benchmarks.git ~/parcels-benchmarks
+git clone --recurse-submodules git@github.com:<your-github-handle>/parcels-benchmarks.git
 ```
 
 3. Run the benchmarks
@@ -61,13 +76,9 @@ Adding benchmarks for parcels typically involves adding a dataset and defining t
 ### Adding new data
 
 Data is hosted remotely on a SurfDrive managed by the Parcels developers. You will need to open an issue on this repository to start the process of getting your data hosted in the shared SurfDrive.
-Once your data is hosted in the shared SurfDrive, you can easily add your dataset to the benchmark dataset manifest using
-
-```
-pixi run benchmark-setup pixi add-dataset --name "Name for your dataset" --file "Path to ZIP archive in the SurfDrive"
-```
+Once your data is hosted in the shared SurfDrive, you can easily add your dataset to the benchmark dataset catalogue by modifying `catalogs/parcels-benchmarks/catalog.yml`.
 
-During this process, the dataset will be downloaded and a complete entry will be added to the [parcels_benchmarks/benchmarks.json](./parcels_benchmarks/benchmarks.json) manifest file. Once updated, this file can be committed to this repository and contributed via a pull request.
+In the benchmark you can now use this catalogue entry.
 
 ### Writing the benchmarks
 

diff --git a/asv.conf.json b/asv.conf.json
diff --git a/asv.conf.jsonc b/asv.conf.jsonc
@@ -0,0 +1,31 @@
+{
+  "version": 1,
+  "project": "parcels",
+  "project_url": "https://github.com/Parcels-Code/parcels",
+  "repo": "./Parcels",
+  "dvcs": "git",
+  "branches": ["main"],
+  "environment_type": "rattler",
+  "conda_channels": [
+    "conda-forge",
+    "defaults",
+    "https://repo.prefix.dev/parcels",
+  ],
+  "default_benchmark_timeout": 1800,
+  "env_dir": ".asv/env",
+  "results_dir": "results",
+  "html_dir": "html",
+  "build_command": ["python -m build --wheel -o {build_cache_dir} {build_dir}"],
+  // "install_command": [
+  //   "in-dir={conf_dir} python -m pip install .",
+  //   "in-dir={build_dir} python -m pip install ."
+  // ],
+  // "uninstall_command": [
+  //   "return-code=any python -m pip uninstall -y parcels parcels_benchmarks"
+  // ]
+  "matrix": {
+    "req": {
+      "intake-xarray": [],
+    },
+  },
+}
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
@@ -0,0 +1,27 @@
+import logging
+import os
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+PIXI_PROJECT_ROOT = os.environ.get("PIXI_PROJECT_ROOT")
+if PIXI_PROJECT_ROOT is not None:
+    PIXI_PROJECT_ROOT = Path(PIXI_PROJECT_ROOT)
+
+PIXI_PROJECT_ROOT: Path | None
+
+try:
+    PARCELS_BENCHMARKS_DATA_FOLDER = Path(os.environ["PARCELS_BENCHMARKS_DATA_FOLDER"])
+except KeyError:
+    # Default to `./data`
+    PARCELS_BENCHMARKS_DATA_FOLDER = Path("./data")
+    logger.info("PARCELS_BENCHMARKS_DATA_FOLDER was not set. Defaulting to `./data`")
+
+if not PARCELS_BENCHMARKS_DATA_FOLDER.is_absolute():
+    if PIXI_PROJECT_ROOT is None:
+        raise RuntimeError(
+            "PARCELS_BENCHMARKS_DATA_FOLDER is a relative path, but PIXI_PROJECT_ROOT env variable is not set. We don't know where to store the data."
+        )
+    PARCELS_BENCHMARKS_DATA_FOLDER = PIXI_PROJECT_ROOT / str(
+        PARCELS_BENCHMARKS_DATA_FOLDER
+    )
diff --git a/benchmarks/catalogs.py b/benchmarks/catalogs.py
@@ -0,0 +1,12 @@
+import intake
+
+from . import PARCELS_BENCHMARKS_DATA_FOLDER
+
+
+class Catalogs:
+    CAT_EXAMPLES = intake.open_catalog(
+        f"{PARCELS_BENCHMARKS_DATA_FOLDER}/surf-data/parcels-examples/catalog.yml"
+    )
+    CAT_BENCHMARKS = intake.open_catalog(
+        f"{PARCELS_BENCHMARKS_DATA_FOLDER}/surf-data/parcels-benchmarks/catalog.yml"
+    )
diff --git a/benchmarks/fesom2.py b/benchmarks/fesom2.py
@@ -1,5 +1,6 @@
 import numpy as np
 import uxarray as ux
+import xarray as xr
 from parcels import (
     FieldSet,
     Particle,
@@ -8,39 +9,38 @@
 )
 from parcels.kernels import AdvectionRK2_3D
 
-from parcels_benchmarks.benchmark_setup import PARCELS_DATADIR, download_example_dataset
+from . import PARCELS_BENCHMARKS_DATA_FOLDER
 
 runtime = np.timedelta64(1, "D")
 dt = np.timedelta64(2400, "s")
 
 
-def _load_ds(datapath):
+def _load_ds():
     """Helper function to load uxarray dataset from datapath"""
 
-    grid_file = f"{datapath}/mesh/fesom.mesh.diag.nc"
-    data_files = f"{datapath}/*.nc"
-    return ux.open_mfdataset(grid_file, data_files, combine="by_coords")
+    grid_file = xr.open_mfdataset(
+        f"{PARCELS_BENCHMARKS_DATA_FOLDER}/surf-data/parcels-benchmarks/data/Parcelsv4_Benchmarking_data/Parcels_Benchmarks_FESOM-baroclinic-gyre/data/mesh/fesom.mesh.diag.nc"
+    )
+    data_files = xr.open_mfdataset(
+        f"{PARCELS_BENCHMARKS_DATA_FOLDER}/surf-data/parcels-benchmarks/data/Parcelsv4_Benchmarking_data/Parcels_Benchmarks_FESOM-baroclinic-gyre/data/*.nc"
+    )
+
+    grid = ux.open_grid(grid_file)
+    return ux.UxDataset(data_files, uxgrid=grid)
 
 
 class FESOM2:
     params = ([10000], [AdvectionRK2_3D])
     param_names = ["npart", "integrator"]
 
-    def setup(self, npart, integrator):
-        # Ensure the dataset is downloaded in the desired data_home
-        # and obtain the path to the dataset
-        self.datapath = download_example_dataset(
-            "FESOM-baroclinic-gyre", data_home=PARCELS_DATADIR
-        )
-
     def time_load_data(self, npart, integrator):
-        ds = _load_ds(self.datapath)
+        ds = _load_ds()
         for i in range(min(ds.coords["time"].size, 2)):
             _u = ds["u"].isel(time=i).compute()
             _v = ds["v"].isel(time=i).compute()
 
     def pset_execute(self, npart, integrator):
-        ds = _load_ds(self.datapath)
+        ds = _load_ds()
         ds = convert.fesom_to_ugrid(ds)
         fieldset = FieldSet.from_ugrid_conventions(ds)
 

diff --git a/benchmarks/moi_curvilinear.py b/benchmarks/moi_curvilinear.py
@@ -6,12 +6,16 @@
 import xgcm
 from parcels.interpolators import XLinear
 
-from parcels_benchmarks.benchmark_setup import PARCELS_DATADIR, download_example_dataset
-
 runtime = np.timedelta64(2, "D")
 dt = np.timedelta64(15, "m")
 
 
+PARCELS_DATADIR = ...  # TODO: Replace with intake
+
+
+def download_dataset(*args, **kwargs): ...  # TODO: Replace with intake
+
+
 def _load_ds(datapath, chunk):
     """Helper function to load xarray dataset from datapath with or without chunking"""
 
@@ -72,9 +76,7 @@ class MOICurvilinear:
     ]
 
     def setup(self, interpolator, chunk, npart):
-        self.datapath = download_example_dataset(
-            "MOi-curvilinear", data_home=PARCELS_DATADIR
-        )
+        self.datapath = download_dataset("MOi-curvilinear", data_home=PARCELS_DATADIR)
 
     def time_load_data_3d(self, interpolator, chunk, npart):
         """Benchmark that times loading the 'U' and 'V' data arrays only for 3-D"""

diff --git a/catalogs/parcels-benchmarks/catalog.yml b/catalogs/parcels-benchmarks/catalog.yml
@@ -0,0 +1,63 @@
+# zip_url: https://surfdrive.surf.nl/index.php/s/7xlfdOFaUGDEmpD/download?path=%2F&files=
+# ^ Do not remove this line! Used by the download script to find the data source
+plugins:
+  source:
+    - module: intake_xarray
+sources: #!TODO Update
+  croco:
+    description: CROCO_idealized
+    driver: netcdf
+    #cache:
+    #  - argkey: urlpath
+    #    regex: ''
+    #    type: file
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/CROCOidealized_data/CROCO_idealized.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  GlobCurrent_example_data:
+    description: GlobCurrent_example_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/GlobCurrent_example_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  MITgcm_example_data:
+    description: MITgcm_example_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/MITgcm_example_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  MovingEddies_data:
+    description: MovingEddies_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/MovingEddies_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+
+  # NemoCurvilinear_data:
+  # NemoNorthSeaORCA025-N006_data:
+  # OFAM_example_data
+  # Peninsula_data
+  SWASH_data:
+    description: SWASH_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/SWASH_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  WOA_data:
+    description: WOA_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/WOA_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
diff --git a/catalogs/parcels-examples/catalog.yml b/catalogs/parcels-examples/catalog.yml
@@ -0,0 +1,64 @@
+# zip_url: https://surfdrive.surf.nl/index.php/s/cmdSy8wBtCLDaGJ/download?path=%2F&files=
+# ^ Do not remove this line! Used by the download script to find the data source
+plugins:
+  source:
+    - module: intake_xarray
+sources:
+  croco:
+    description: CROCO_idealized
+    driver: netcdf
+    #cache:
+    #  - argkey: urlpath
+    #    regex: ''
+    #    type: file
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/CROCOidealized_data/CROCO_idealized.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  GlobCurrent_example_data:
+    description: GlobCurrent_example_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/GlobCurrent_example_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  MITgcm_example_data:
+    description: MITgcm_example_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/MITgcm_example_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  MovingEddies_data:
+    description: MovingEddies_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/MovingEddies_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+
+  # NemoCurvilinear_data:
+  # NemoNorthSeaORCA025-N006_data:
+  # OFAM_example_data
+  # Peninsula_data
+  # SWASH_data
+  SWASH_data:
+    description: SWASH_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/SWASH_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
+  WOA_data:
+    description: WOA_data
+    driver: netcdf
+    args:
+      urlpath: "{{ CATALOG_DIR }}/data/WOA_data/*.nc"
+      chunks: {}
+      xarray_kwargs:
+        engine: "netcdf4"
diff --git a/parcels_benchmarks/__init__.py b/parcels_benchmarks/__init__.py