From 6234d9f7a75b782c697f66e7ec5b2bc7d1073d59 Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Wed, 11 Feb 2026 14:17:52 +1100
Subject: [PATCH 1/8] keep experiment-generator and experiment-runner as an
 extra bundle

---
 pyproject.toml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 40667a9..cfb2dff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,6 +64,11 @@ addopts = [
 
 testpaths = ["tests"]
 
+access = [
+  "experiment-generator",
+  "experiment-runner",
+]
+
 [tool.coverage.run]
 
 [tool.ruff]

From 82c7415acaf96eb949478c5218c32db912b999d8 Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Thu, 12 Feb 2026 14:43:42 +1100
Subject: [PATCH 2/8] Remove the optional access section & add an interactive
 section

---
 pyproject.toml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cfb2dff..40667a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,11 +64,6 @@ addopts = [
 
 testpaths = ["tests"]
 
-access = [
-  "experiment-generator",
-  "experiment-runner",
-]
-
 [tool.coverage.run]
 
 [tool.ruff]

From 2e4bb8c2b490259fa1669f8c692ea04ff0ffa19d Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:45:42 +1100
Subject: [PATCH 3/8] Rename extract_index_list to extract_index_list_from_str

---
 src/access/esmf_trace/batch_runs.py | 4 ++--
 src/access/esmf_trace/utils.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/access/esmf_trace/batch_runs.py b/src/access/esmf_trace/batch_runs.py
index bb8bf19..a80fe6b 100644
--- a/src/access/esmf_trace/batch_runs.py
+++ b/src/access/esmf_trace/batch_runs.py
@@ -6,7 +6,7 @@
 
 from .config import ConfigError, DefaultSettings, RunSettings
 from .run import run as single_run
-from .utils import extract_index_list, output_name_to_index
+from .utils import extract_index_list_from_str, output_name_to_index
 
 
 def _find_traceout_dir(output_dir: Path, stream_prefix: str) -> Path | None:
@@ -29,7 +29,7 @@ def _gather_outputs(archive_dir: Path, output_index: str | None) -> list[Path]:
     all_outputs = [p for p in archive_dir.glob("output*") if p.is_dir()]
     all_outputs = [p for p in all_outputs if output_name_to_index(p) is not None]
     output_dirs = sorted(all_outputs, key=output_name_to_index)
-    selected = extract_index_list(output_index)
+    selected = extract_index_list_from_str(output_index)
     if selected is not None:
         sel = set(selected)
         present = {output_name_to_index(p) for p in output_dirs}
diff --git a/src/access/esmf_trace/utils.py b/src/access/esmf_trace/utils.py
index de68e40..03df9c7 100644
--- a/src/access/esmf_trace/utils.py
+++ b/src/access/esmf_trace/utils.py
@@ -19,7 +19,7 @@ def output_dir_to_index(p: Path) -> int | None:
     return output_name_to_index(p.name)
 
 
-def extract_index_list(s: str | None) -> list[int] | None:
+def extract_index_list_from_str(s: str | None) -> list[int] | None:
     """
     Parse '0,2-4,9' -> [0,2,3,4,9]
     """

From cb12246877acd0ae1273dab2543dd8f17446cf6a Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:46:15 +1100
Subject: [PATCH 4/8] Fix Path symlink

---
 src/access/esmf_trace/ctf_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/access/esmf_trace/ctf_parser.py b/src/access/esmf_trace/ctf_parser.py
index 0a36a21..b76bca3 100644
--- a/src/access/esmf_trace/ctf_parser.py
+++ b/src/access/esmf_trace/ctf_parser.py
@@ -98,9 +98,9 @@ def open_selected_streams(traceout_path: Path, stream_paths: iter):
     tmpdir = Path(tempfile.mkdtemp(prefix="ctf_stage_")).resolve()
     try:
         # link metadata and the selected streams into the temp bundle
-        Path.symlink(meta, tmpdir / "metadata", target_is_directory=False)
+        (tmpdir / "metadata").symlink_to(meta)
         for s in streams:
-            Path.symlink(s, tmpdir / s.name, target_is_directory=False)
+            (tmpdir / s.name).symlink_to(s)
 
         yield bt2.TraceCollectionMessageIterator(str(tmpdir))
     finally:

From 094580fce417d1b116f0993853a5d9cb4aa5243c Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:48:50 +1100
Subject: [PATCH 5/8] Refactor postsummary and postrun from postprocess.py to
 config.py and refactor cli with library input

---
 src/access/esmf_trace/common_vars.py |  34 ++++
 src/access/esmf_trace/config.py      | 231 ++++++++++++++++++++-------
 src/access/esmf_trace/library.py     |  55 +++++++
 src/access/esmf_trace/main.py        | 135 +++++++++-------
 src/access/esmf_trace/postprocess.py | 172 ++++++--------------
 5 files changed, 387 insertions(+), 240 deletions(-)
 create mode 100644 src/access/esmf_trace/library.py

diff --git a/src/access/esmf_trace/common_vars.py b/src/access/esmf_trace/common_vars.py
index b99e818..73d8bf8 100644
--- a/src/access/esmf_trace/common_vars.py
+++ b/src/access/esmf_trace/common_vars.py
@@ -1 +1,35 @@
+from typing import Literal
+
+
 seconds_to_nanoseconds = 1e9
+
+# For now, two config kinds: "run" and "post-summary" are included.
+# This might be extended if we want to support more config kinds.
+config_kind = Literal["run", "post-summary"]
+
+# Common keys for both run and post-summary configs
+RUN_DEFAULT_FLAG_KEYS = [
+    "merge_adjacent",
+    "xaxis_datetime",
+    "separate_plots",
+    "show_html",
+]
+
+RUN_DEFAULT_KEYS = [
+    "stream_prefix",
+    "model_component",
+    "max_depth",
+    "merge_gap_ns",
+    "cmap",
+    "renderer",
+    "max_workers",
+]
+
+POST_SUMMARY_DEFAULT_KEYS = [
+    "timeseries_suffix",
+    "save_json_path",
+    "stats_start_index",
+    "stats_end_index",
+    "pets",
+    "model_component",
+]
diff --git a/src/access/esmf_trace/config.py b/src/access/esmf_trace/config.py
index 40f294a..ba8f29c 100644
--- a/src/access/esmf_trace/config.py
+++ b/src/access/esmf_trace/config.py
@@ -1,5 +1,9 @@
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Literal, overload
+from .tmp_yaml_parser import read_yaml
+from .utils import extract_index_list_from_str, extract_pets
+from .common_vars import config_kind
 
 
 class ConfigError(Exception):
@@ -84,66 +88,185 @@ def to_job_kwargs(
         }
 
 
-def _require_key(d: dict, keys: list[str]) -> str:
+@dataclass(frozen=True)
+class PostSummarySettings:
+    post_base_path: Path
+    model_component: list[str] | None = None
+    pets: list[int] | None = None
+    stats_start_index: int | None = None
+    stats_end_index: int | None = None
+    timeseries_suffix: str = "_timeseries.json"
+    save_json_path: Path | None = None
+
+
+@dataclass(frozen=True)
+class PostRunSettings:
+    name: str
+    output_index: list[str] | None = None
+    model_component: list[str] | None = None
+    pets: list[int] | None = None
+    stats_start_index: int | None = None
+    stats_end_index: int | None = None
+    save_json_path: Path | None = None
+
+
+def _as_mapping(x, what: str) -> dict:
+    if not isinstance(x, dict):
+        raise ConfigError(f"{what} must be a mapping (dict)")
+    return x
+
+
+def _as_list(x, what: str) -> list:
+    if not isinstance(x, list):
+        raise ConfigError(f"{what} must be a list")
+    return x
+
+
+def _require_keys(d: dict, keys: list[str], where: str) -> None:
     missing = [k for k in keys if k not in d]
     if missing:
-        raise ConfigError(f"missing required config key(s): {', '.join(missing)}")
-
-
-def _parse_defaults(d: dict) -> DefaultSettings:
-    return DefaultSettings(
-        post_base_path=d.get("post_base_path"),
-        stream_prefix=d.get("stream_prefix", "esmf_stream"),
-        model_component=d.get("model_component", "[ESMF]/[ensemble] RunPhase1/[ESM0001] RunPhase1"),
-        max_workers=d.get("max_workers"),
-        xaxis_datetime=bool(d.get("xaxis_datetime", False)),
-        separate_plots=bool(d.get("separate_plots", False)),
-        cmap=d.get("cmap", "tab10"),
-        renderer=d.get("renderer", "browser"),
-        show_html=bool(d.get("show_html", False)),
-        max_depth=int(d.get("max_depth", 6)),
-        merge_adjacent=bool(d.get("merge_adjacent", False)),
-        merge_gap_ns=int(d.get("merge_gap_ns", 1000)),
-    )
-
-
-def _parse_runs(lst: list[dict]) -> list[RunSettings]:
-    runs = []
-    for item in lst:
-        if not isinstance(item, dict):
-            raise ConfigError("Each run must be a mapping (dict)")
-
-        has_exact_path = item.get("exact_path")
-        has_other_parts = item.get("run_base") and item.get("run_name") and item.get("branch")
-        if not has_exact_path and not has_other_parts:
-            raise ConfigError(
-                "Each run must have either 'exact_path' or all of 'run_base', 'run_name', and 'branch' set"
-            )
+        raise ConfigError(f"missing required config key(s) in {where}: {', '.join(missing)}")
+
+
+def _norm_model_component(v: str | list | tuple | set | None) -> list[str] | None:
+    """
+    Normalise model_component to a list of strings.
+    Accepts a comma-separated str or a list[str].
+    """
+    if v is None:
+        return None
+
+    if isinstance(v, (list, tuple, set)):
+        parts = [str(x).strip() for x in v if str(x).strip()]
+        return parts or None
+
+    s = str(v).strip()
+    if not s:
+        return None
+
+    # split on commas
+    parts = [p.strip() for p in s.split(",") if p.strip()]
+    return parts or None
+
 
-        runs.append(
-            RunSettings(
-                base_prefix=item.get("base_prefix"),
-                post_base_path=item.get("post_base_path"),
-                exact_path=Path(item["exact_path"]) if item.get("exact_path") else None,
-                run_base=Path(item["run_base"]) if item.get("run_base") else None,
-                run_name=item.get("run_name"),
-                branch=item.get("branch"),
-                pets=item.get("pets"),
-                model_component=item.get("model_component"),
-                output_index=item.get("output_index"),
+def _norm_int_or_none(v: int | str | None) -> int | None:
+    if v is None or v == "":
+        return None
+    return int(v)
+
+
+def _norm_path_or_none(v: str | Path | None) -> Path | None:
+    if v is None:
+        return None
+    return Path(v).expanduser()
+
+
+# define overloads for type checking of load_yaml_config
+@overload
+def load_yaml_config(config_path: Path, kind: Literal["run"]) -> (DefaultSettings, list[RunSettings]): ...
+@overload
+def load_yaml_config(
+    config_path: Path, kind: Literal["post-summary"]
+) -> (PostSummarySettings, list[PostRunSettings]): ...
+
+
+def load_yaml_config(config_path: Path, kind: config_kind):
+    """
+    Load and validate an esmf-trace yaml configuration file.
+    """
+    config_path = Path(config_path)
+    data = read_yaml(config_path)
+
+    _require_keys(data, ["default_settings", "runs"], where=str(config_path))
+    default = _as_mapping(data["default_settings"], what="default_settings")
+    runs = _as_list(data["runs"], what="runs")
+
+    if kind == "run":
+        defaults = DefaultSettings(
+            post_base_path=default.get("post_base_path"),
+            stream_prefix=default.get("stream_prefix", "esmf_stream"),
+            model_component=default.get("model_component", "[ESMF]/[ensemble] RunPhase1/[ESM0001] RunPhase1"),
+            max_workers=default.get("max_workers"),
+            xaxis_datetime=bool(default.get("xaxis_datetime", False)),
+            separate_plots=bool(default.get("separate_plots", False)),
+            cmap=default.get("cmap", "tab10"),
+            renderer=default.get("renderer", "browser"),
+            show_html=bool(default.get("show_html", False)),
+            max_depth=int(default.get("max_depth", 6)),
+            merge_adjacent=bool(default.get("merge_adjacent", False)),
+            merge_gap_ns=int(default.get("merge_gap_ns", 1000)),
+        )
+
+        run_settings: list[RunSettings] = []
+        for i, item in enumerate(runs):
+            item = _as_mapping(item, what=f"runs[{i}]")
+
+            has_exact_path = item.get("exact_path")
+            has_other_parts = item.get("run_base") and item.get("run_name") and item.get("branch")
+            if not has_exact_path and not has_other_parts:
+                raise ConfigError(
+                    f"Each run must have either 'exact_path' or all of 'run_base', 'run_name', and 'branch' set (error in runs[{i}])"
+                )
+
+            run_settings.append(
+                RunSettings(
+                    base_prefix=item.get("base_prefix"),
+                    post_base_path=item.get("post_base_path"),
+                    exact_path=_norm_path_or_none(item.get("exact_path") if item.get("exact_path") else None),
+                    run_base=_norm_path_or_none(item.get("run_base") if item.get("run_base") else None),
+                    run_name=item.get("run_name"),
+                    branch=item.get("branch"),
+                    archive=item.get("archive", "archive"),
+                    pets=item.get("pets"),
+                    model_component=item.get("model_component"),
+                    output_index=item.get("output_index"),
+                )
             )
+
+        return defaults, run_settings
+
+    if kind == "post-summary":
+        post_base = default.get("post_base_path")
+        if not post_base:
+            raise ConfigError("default_settings.post_base_path is required for post-summary config")
+
+        defaults = PostSummarySettings(
+            post_base_path=Path(post_base).expanduser(),
+            model_component=_norm_model_component(default.get("model_component")),
+            pets=extract_pets(default.get("pets") if default.get("pets") is not None else None),
+            stats_start_index=_norm_int_or_none(default.get("stats_start_index")),
+            stats_end_index=_norm_int_or_none(default.get("stats_end_index")),
+            timeseries_suffix=default.get("timeseries_suffix", "_timeseries.json"),
+            save_json_path=_norm_path_or_none(default.get("save_json_path")),
         )
-    return runs
 
+        post_runs: list[PostRunSettings] = []
+        for i, item in enumerate(runs):
+            item = _as_mapping(item, what=f"runs[{i}]")
+            _require_keys(item, ["name"], where=f"runs[{i}]")
 
-def load_config(input_config: dict) -> (DefaultSettings, list[RunSettings]):
-    _require_key(input_config, ["default_settings", "runs"])
+            oi = item.get("output_index")
+            if isinstance(oi, list):
+                output_index = [int(x) for x in oi]
+            elif isinstance(oi, str):
+                output_index = extract_index_list_from_str(oi)
+            else:
+                output_index = None
 
-    if not isinstance(input_config["default_settings"], dict):
-        raise ConfigError("'default_settings' must be a dict")
-    if not isinstance(input_config["runs"], list):
-        raise ConfigError("'runs' must be a list")
+            pets_input = item.get("pets", defaults.pets)
+            pets = pets_input if isinstance(pets_input, list) or pets_input is None else extract_pets(str(pets_input))
+
+            post_runs.append(
+                PostRunSettings(
+                    name=str(item["name"]),
+                    output_index=output_index,
+                    model_component=_norm_model_component(item.get("model_component", defaults.model_component)),
+                    pets=pets,
+                    stats_start_index=_norm_int_or_none(item.get("stats_start_index", default.stats_start_index)),
+                    stats_end_index=_norm_int_or_none(item.get("stats_end_index", default.stats_end_index)),
+                    save_json_path=_norm_path_or_none(item.get("save_json_path", default.save_json_path)),
+                )
+            )
+        return defaults, post_runs
 
-    defaults = _parse_defaults(input_config["default_settings"])
-    runs = _parse_runs(input_config["runs"])
-    return defaults, runs
+    raise ValueError(f"Invalid config kind: {kind}")
diff --git a/src/access/esmf_trace/library.py b/src/access/esmf_trace/library.py
new file mode 100644
index 0000000..767030f
--- /dev/null
+++ b/src/access/esmf_trace/library.py
@@ -0,0 +1,55 @@
+from dataclasses import replace
+from pathlib import Path
+
+from .batch_runs import run_batch_jobs
+from .config import DefaultSettings, PostSummarySettings, load_yaml_config, RunSettings, PostRunSettings
+from .postprocess import post_summary_from_yaml
+
+
+def run_from_config(
+    config_path: str | Path | dict,
+    run_overrides: dict | None = None,
+):
+    """
+    Either a yaml path or a dict with the same structure.
+
+    run_overrides: optional dict of DefaultSettings field overrides
+    e.g. {"stream_prefix": "esmf_stream", "max_workers": 8}
+    """
+
+    if isinstance(config_path, (str, Path)):
+        defaults, runs = load_yaml_config(Path(config_path), kind="run")
+    else:
+        defaults = DefaultSettings(**config_path["default_settings"])
+        runs = [RunSettings(**r) for r in config_path["runs"]]
+
+    if run_overrides:
+        defaults = replace(defaults, **dict(run_overrides))
+
+    run_batch_jobs(defaults, runs)
+
+
+def post_summary_from_config(
+    config_path: str | Path | dict,
+    post_overrides: dict | None = None,
+    save_json_path: str | Path | None = None,
+):
+    """
+    Either a yaml path or a dict with the same structure.
+
+    post_overrides: optional dict of PostSummarySettings field overrides
+    e.g. {"timeseries_suffix": "_timeseries.json", "stats_start_index": 1}
+    """
+
+    if isinstance(config_path, (str, Path)):
+        defaults, runs = load_yaml_config(Path(config_path), kind="post-summary")
+        assert isinstance(defaults, PostSummarySettings)
+    else:
+        defaults = PostSummarySettings(**config_path["default_settings"])
+        runs = [PostRunSettings(**r) for r in config_path["runs"]]
+
+    if post_overrides:
+        defaults = replace(defaults, **dict(post_overrides))
+
+    out_path = str(save_json_path) if save_json_path is not None else None
+    post_summary_from_yaml(defaults, runs, save_json_path=out_path)
diff --git a/src/access/esmf_trace/main.py b/src/access/esmf_trace/main.py
index 243fc5f..31bab14 100644
--- a/src/access/esmf_trace/main.py
+++ b/src/access/esmf_trace/main.py
@@ -1,18 +1,15 @@
 import argparse
-from dataclasses import replace
 from pathlib import Path
 
-from .batch_runs import run_batch_jobs
-from .config import DefaultSettings, load_config
-from .postprocess import run_post_summary_from_yaml
-from .tmp_yaml_parser import read_yaml
+from .common_vars import RUN_DEFAULT_FLAG_KEYS, RUN_DEFAULT_KEYS, POST_SUMMARY_DEFAULT_KEYS
+from .library import run_from_config, post_summary_from_config
 
 
-def _override_run_args(ns: argparse.Namespace) -> None:
+def _add_run_overrides(parser: argparse.ArgumentParser) -> None:
     """
     Optional overrides from command line args to config settings.
     """
-    arg = ns.add_argument_group("overrides", "Optional overrides to config settings")
+    arg = parser.add_argument_group("overrides", "Optional overrides to config settings")
 
     arg.add_argument(
         "--stream-prefix",
@@ -74,40 +71,56 @@ def _override_run_args(ns: argparse.Namespace) -> None:
     )
 
 
-def _apply_overrides(ns: argparse.Namespace, defaults: DefaultSettings) -> DefaultSettings:
+def _apply_run_overrides(ns: argparse.Namespace) -> dict:
     """
     Apply any command line overrides to the run defaults.
     """
-    updates = {}
+    overrides = {}
 
     # booleans only override when True provided
-    if getattr(ns, "merge_adjacent", False):
-        updates["merge_adjacent"] = True
-    if getattr(ns, "xaxis_datetime", False):
-        updates["xaxis_datetime"] = True
-    if getattr(ns, "separate_plots", False):
-        updates["separate_plots"] = True
-    if getattr(ns, "show_html", False):
-        updates["show_html"] = True
+    for flag in RUN_DEFAULT_FLAG_KEYS:
+        if getattr(ns, flag, False):
+            overrides[flag] = True
 
     # None means no override
-    for f in [
-        "stream_prefix",
-        "model_component",
-        "max_depth",
-        "merge_gap_ns",
-        "cmap",
-        "renderer",
-        "max_workers",
-    ]:
+    for f in RUN_DEFAULT_KEYS:
         v = getattr(ns, f, None)
         if v is not None:
-            updates[f] = v
+            overrides[f] = v
 
-    return replace(defaults, **updates) if updates else defaults
+    return overrides
 
 
-def _add_run_from_yaml_subparser(subparsers) -> None:
+def _add_post_summary_overrides(parser: argparse.ArgumentParser) -> None:
+    """
+    Add optional override arguments for the post-summary-from-yaml command.
+    """
+    arg = parser.add_argument_group("overrides", "Optional overrides to config settings")
+
+    arg.add_argument("--model-component", nargs="+", help="Full model_component name(s) to include.")
+    arg.add_argument("--pets", nargs="+", type=int, help="PET index(es) to include.")
+    arg.add_argument("--stats-start-index", type=int, help="Slice start (iloc) per series.")
+    arg.add_argument("--stats-end-index", type=int, help="Slice end (iloc, exclusive) per series.")
+    arg.add_argument(
+        "--timeseries-suffix", type=str, help="Timeseries filename suffix to match (e.g., _timeseries.json)."
+    )
+    arg.add_argument("--save-json-path", type=Path, help="Save combined summary JSON to this path.")
+
+
+def _apply_post_summary_overrides(ns: argparse.Namespace) -> dict:
+    overrides = {}
+
+    for f in POST_SUMMARY_DEFAULT_KEYS:
+        v = getattr(ns, f, None)
+        if v is not None:
+            if f == "save_json_path" and isinstance(v, Path):
+                v = str(v)
+            overrides[f] = v
+
+    return overrides
+
+
+def _add_run_command(subparsers) -> None:
     """
     run-from-yaml:
       Process multiple traceout directories from a yaml config file
@@ -125,12 +138,12 @@ def _add_run_from_yaml_subparser(subparsers) -> None:
     )
 
     # Optional overrides
-    _override_run_args(rs)
+    _add_run_overrides(rs)
 
-    rs.set_defaults(func=run_from_yaml_config)
+    rs.set_defaults(func=cli_run_from_yaml)
 
 
-def _add_post_summary_from_yaml_subparser(subparsers) -> None:
+def _add_post_summary_command(subparsers) -> None:
     """
     post-summary-from-yaml:
       Summarise existing *_timeseries.json files by reading a YAML file that lists:
@@ -150,52 +163,52 @@ def _add_post_summary_from_yaml_subparser(subparsers) -> None:
         help="yaml config file for postprocessing summary",
     )
 
-    arg = ps.add_argument_group("overrides", "Optional overrides to config settings")
-
-    # Optional override
-    arg.add_argument("--model-component", nargs="+", help="Full model_component name(s) to include.")
-    arg.add_argument("--pets", nargs="+", type=int, help="PET index(es) to include.")
-    arg.add_argument("--stats-start-index", type=int, help="Slice start (iloc) per series.")
-    arg.add_argument(
-        "--stats-end-index", type=int, help="Slice end (iloc, exclusive) per series. Default: full length."
-    )
-    arg.add_argument(
-        "--timeseries-suffix",
-        type=str,
-        default="_timeseries.json",
-        help="Timeseries filename suffix to match (default: _timeseries.json).",
-    )
-    arg.add_argument(
-        "--save-json-path", type=Path, help="Save summary to json format file (otherwise prints to stdout)."
-    )
+    # Optional overrides
+    _add_post_summary_overrides(ps)
 
-    ps.set_defaults(func=run_post_summary_from_yaml)
+    ps.set_defaults(func=cli_post_summary_from_yaml)
 
 
-def run_from_yaml_config(
+def cli_run_from_yaml(
     ns: argparse.Namespace,
 ) -> None:
     """
     Run multiple jobs from a yaml config file with optional command line overrides.
     """
-    input_config = read_yaml(ns.config)
-    defaults, runs = load_config(input_config)
-    # overides
-    defaults = _apply_overrides(ns, defaults)
-    run_batch_jobs(defaults, runs)
+    run_from_config(ns.config, run_overrides=_apply_run_overrides(ns))
 
 
-def main():
+def cli_post_summary_from_yaml(
+    ns: argparse.Namespace,
+) -> None:
+    """
+    Summarise existing e.g. *_timeseries.json files by reading a yaml file that lists:
+      - post_base_path
+      - cases: [{ name: postprocessing_<case>, output_index: [optional list of ints] }, ...]
+    """
+    post_summary_from_config(
+        ns.config,
+        post_overrides=_apply_post_summary_overrides(ns),
+        save_json_path=ns.save_json_path,
+    )
+
+
+def build_parser() -> argparse.ArgumentParser:
+    """
+    Build and return the CLI argument parser.
+    """
     parser = argparse.ArgumentParser(
         prog="esmf-trace",
         description="ESMF traceout analysis and visualisation.",
     )
-
     subparsers = parser.add_subparsers(dest="cmd", required=True)
+    _add_run_command(subparsers)
+    _add_post_summary_command(subparsers)
+    return parser
 
-    _add_run_from_yaml_subparser(subparsers)
-    _add_post_summary_from_yaml_subparser(subparsers)
 
+def main():
+    parser = build_parser()
     args = parser.parse_args()
     args.func(args)
 
diff --git a/src/access/esmf_trace/postprocess.py b/src/access/esmf_trace/postprocess.py
index 790554d..f906ce0 100644
--- a/src/access/esmf_trace/postprocess.py
+++ b/src/access/esmf_trace/postprocess.py
@@ -1,11 +1,12 @@
 import argparse
+from dataclasses import replace
 import json
 from pathlib import Path
 
 import pandas as pd
 
-from .tmp_yaml_parser import read_yaml
-from .utils import extract_pets, output_dir_to_index, output_name_to_index
+from .config import PostSummarySettings, PostRunSettings
+from .utils import output_dir_to_index, output_name_to_index
 
 
 def _load_timeseries_json(p: Path) -> pd.DataFrame:
@@ -85,106 +86,6 @@ def _collect_case_jsons(
     return jsons
 
 
-def _as_list_or_none(v) -> list | None:
-    if v is None:
-        return None
-    if isinstance(v, (list, tuple, set)):
-        return list(v)
-    return [v]
-
-
-def _norm_model_component(v) -> list[str] | None:
-    """
-    Normalise model_component to a list of strings.
-    Accepts a comma-separated str or a list[str].
-    """
-    if v is None:
-        return None
-    if isinstance(v, (list, tuple, set)):
-        parts = [str(x).strip() for x in v if str(x).strip()]
-        return parts or None
-    s = str(v).strip()
-    if not s:
-        return None
-    # split on commas
-    parts = [p.strip() for p in s.split(",") if p.strip()]
-    return parts or None
-
-
-def _norm_pets(v) -> list[int] | None:
-    if v is None:
-        return None
-    if isinstance(v, str):
-        return _as_list_or_none(extract_pets(v))
-    if isinstance(v, (list, tuple, set)):
-        return [int(x) for x in v]
-    return [int(v)]
-
-
-def _norm_end(v):
-    if v is None or v == "":
-        return None
-    return int(v)
-
-
-def load_post_runs_config(config_path: Path) -> tuple[dict, list[dict]]:
-    """
-    Parse 'postprocessing.yaml' with:
-      default_settings:
-      { post_base_path, model_component?, pets?, stats_start_index?, stats_end_index?, timeseries_suffix? }
-      runs: [ { name, output_index?, model_component?, pets?, stats_start_index?, stats_end_index? }, ... ]
-    """
-    data = read_yaml(config_path)
-
-    if "default_settings" not in data or "runs" not in data:
-        raise ValueError("YAML must have 'default_settings' and 'runs' keys.")
-
-    dflt = data["default_settings"]
-    runs = data["runs"]
-
-    if not isinstance(dflt, dict):
-        raise ValueError("'default_settings' must be a mapping.")
-    if not isinstance(runs, list) or not runs:
-        raise ValueError("'runs' must be a non-empty list.")
-
-    post_base_path = dflt.get("post_base_path")
-    if not post_base_path:
-        raise ValueError("'default_settings.post_base_path' is required.")
-
-    defaults = {
-        "post_base_path": Path(post_base_path).expanduser().resolve(),
-        "model_component": _norm_model_component(dflt.get("model_component")),
-        "pets": _norm_pets(dflt.get("pets")),
-        "stats_start_index": (
-            int(dflt.get("stats_start_index")) if dflt.get("stats_start_index") is not None else None
-        ),
-        "stats_end_index": _norm_end(dflt.get("stats_end_index")),
-        "timeseries_suffix": dflt.get("timeseries_suffix", "_timeseries.json"),
-        "save_json_path": (Path(dflt["save_json_path"]).expanduser() if dflt.get("save_json_path") else None),
-    }
-
-    norm_runs: list[dict] = []
-    for r in runs:
-        norm_runs.append(
-            {
-                "name": str(r["name"]),
-                "output_index": ([int(x) for x in r["output_index"]] if r.get("output_index") is not None else None),
-                "model_component": _norm_model_component(r.get("model_component", defaults["model_component"])),
-                "pets": _norm_pets(r.get("pets", defaults["pets"])),
-                "stats_start_index": (
-                    int(r.get("stats_start_index"))
-                    if r.get("stats_start_index") is not None
-                    else defaults["stats_start_index"]
-                ),
-                "stats_end_index": _norm_end(
-                    r.get("stats_end_index") if r.get("stats_end_index") is not None else defaults["stats_end_index"]
-                ),
-                "save_json_path": (Path(r["save_json_path"]).expanduser() if r.get("save_json_path") else None),
-            }
-        )
-    return defaults, norm_runs
-
-
 def _summarise_case(
     json_paths: list[Path],
     model_component: list[str] | None,
@@ -295,8 +196,7 @@ def _summarise_case(
         combined_by_comp["__case_name"] + "/combine/" + combined_by_comp["model_component"].astype(str).str.strip()
     )
 
-    out = pd.concat([per_output[output_cols], combined_by_comp[output_cols]], ignore_index=True)
-    return out
+    return pd.concat([per_output[output_cols], combined_by_comp[output_cols]], ignore_index=True)
 
 
 def _resolve_save_json_path(save_json_path: str | None) -> Path | None:
@@ -309,42 +209,39 @@ def _resolve_save_json_path(save_json_path: str | None) -> Path | None:
     return p
 
 
-def run_post_summary_from_yaml(ns: argparse.Namespace) -> None:
-    """
-    Build per-output rows + a combined row for each selected case (from yaml),
-    then print a combined table.
-    """
-    defaults, runs = load_post_runs_config(Path(ns.config))
-
-    post_base_path: Path = defaults["post_base_path"]
-    timeseries_suffix: str = defaults["timeseries_suffix"]
+def post_summary_from_yaml(
+    defaults: PostSummarySettings,
+    runs: list[PostRunSettings],
+    save_json_path: str | None = None,
+) -> pd.DataFrame:
+    post_base_path: Path = defaults.post_base_path
+    timeseries_suffix: str = defaults.timeseries_suffix
 
     per_case_tables: list[pd.DataFrame] = []
 
-    # process each run (case)
     for r in runs:
-        case_name = r["name"]
+        case_name = r.name
 
         jsons = _collect_case_jsons(
             post_base_path=post_base_path,
             case_name=case_name,
-            output_index=r["output_index"],
+            output_index=r.output_index,
             timeseries_suffix=timeseries_suffix,
         )
 
         case_summary = _summarise_case(
             json_paths=jsons,
-            model_component=r["model_component"],
-            pets=r["pets"],
-            stats_start_index=r["stats_start_index"],
-            stats_end_index=r["stats_end_index"],
+            model_component=r.model_component,
+            pets=r.pets,
+            stats_start_index=r.stats_start_index,
+            stats_end_index=r.stats_end_index,
         )
 
         if case_summary.empty:
             continue
 
         # Save per-run json if this run specified a save path (strict .json)
-        per_run_save = _resolve_save_json_path(r.get("save_json_path"))
+        per_run_save = _resolve_save_json_path(r.save_json_path)
         if per_run_save is not None:
             (
                 case_summary.reset_index(drop=True).to_json(  # ensure a clean row index
@@ -370,10 +267,7 @@ def run_post_summary_from_yaml(ns: argparse.Namespace) -> None:
     print("-- Summary table:")
     print(clean_df)
 
-    # save combined json if requested: cli override, else defaults
-    cli_combined = getattr(ns, "save_json_path", None)
-    default_combined = defaults.get("save_json_path")
-    combined_out = _resolve_save_json_path(cli_combined or default_combined)
+    combined_out = _resolve_save_json_path(save_json_path or defaults.save_json_path)
 
     if combined_out is not None:
         (combined_df.rename(columns={"__row_label": "name"}).to_json(combined_out, orient="records", indent=2))
@@ -383,3 +277,31 @@ def run_post_summary_from_yaml(ns: argparse.Namespace) -> None:
         clean_parquet = combined_out.with_name(combined_out.stem + "_table.parquet")
         clean_df.to_parquet(clean_parquet, index=True)
         print(f"-- saved cleaned table parquet: {clean_parquet}")
+
+
+# def run_post_summary_from_yaml(ns: argparse.Namespace) -> None:
+#     """
+#     cli entrypoint for post-summary config from yaml and run the summary.
+#     """
+#     defaults, runs = load_yaml_config(Path(ns.config), kind="post-summary")
+#     assert isinstance(defaults, PostSummarySettings)
+
+#     # apply overrides to defaults (if any)
+#     overrides = {}
+#     if getattr(ns, "model_component", None) is not None:
+#         overrides["model_component"] = ns.model_component  # list[str]
+#     if getattr(ns, "pets", None) is not None:
+#         overrides["pets"] = ns.pets  # list[int]
+#     if getattr(ns, "stats_start_index", None) is not None:
+#         overrides["stats_start_index"] = ns.stats_start_index  # int
+#     if getattr(ns, "stats_end_index", None) is not None:
+#         overrides["stats_end_index"] = ns.stats_end_index  # int
+#     if getattr(ns, "timeseries_suffix", None) is not None:
+#         overrides["timeseries_suffix"] = ns.timeseries_suffix  # str
+#     if getattr(ns, "save_json_path", None) is not None:
+#         overrides["save_json_path"] = ns.save_json_path  # str
+
+#     if overrides:
+#         defaults = replace(defaults, **overrides)
+
+#     post_summary_from_yaml(defaults, runs, save_json_path=ns.save_json_path)

From a4206ceff5afce02e64078a50a50919061c69937 Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:50:19 +1100
Subject: [PATCH 6/8] Export run_from_config and post_summary_from_config at
 top level

---
 src/access/esmf_trace/__init__.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/access/esmf_trace/__init__.py b/src/access/esmf_trace/__init__.py
index 704f5ca..8c1eef1 100644
--- a/src/access/esmf_trace/__init__.py
+++ b/src/access/esmf_trace/__init__.py
@@ -7,3 +7,13 @@
 
 with suppress(PackageNotFoundError):
     __version__ = version("esmf_trace")
+
+from access.esmf_trace.library import (
+    run_from_config,
+    post_summary_from_config,
+)
+
+__all__ = [
+    "run_from_config",
+    "post_summary_from_config",
+]

From 5bcc5f14aaecbf8da67f822c572a6e8546ca2f98 Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:58:57 +1100
Subject: [PATCH 7/8] ruff check --fix

---
 src/access/esmf_trace/__init__.py    | 2 +-
 src/access/esmf_trace/common_vars.py | 1 -
 src/access/esmf_trace/config.py      | 6 ++++--
 src/access/esmf_trace/library.py     | 2 +-
 src/access/esmf_trace/main.py        | 4 ++--
 src/access/esmf_trace/postprocess.py | 4 +---
 6 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/access/esmf_trace/__init__.py b/src/access/esmf_trace/__init__.py
index 8c1eef1..d1b4846 100644
--- a/src/access/esmf_trace/__init__.py
+++ b/src/access/esmf_trace/__init__.py
@@ -9,8 +9,8 @@
     __version__ = version("esmf_trace")
 
 from access.esmf_trace.library import (
-    run_from_config,
     post_summary_from_config,
+    run_from_config,
 )
 
 __all__ = [
diff --git a/src/access/esmf_trace/common_vars.py b/src/access/esmf_trace/common_vars.py
index 73d8bf8..283ae74 100644
--- a/src/access/esmf_trace/common_vars.py
+++ b/src/access/esmf_trace/common_vars.py
@@ -1,6 +1,5 @@
 from typing import Literal
 
-
 seconds_to_nanoseconds = 1e9
 
 # For now, two config kinds: "run" and "post-summary" are included.
diff --git a/src/access/esmf_trace/config.py b/src/access/esmf_trace/config.py
index ba8f29c..6bc9003 100644
--- a/src/access/esmf_trace/config.py
+++ b/src/access/esmf_trace/config.py
@@ -1,9 +1,10 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Literal, overload
+
+from .common_vars import config_kind
 from .tmp_yaml_parser import read_yaml
 from .utils import extract_index_list_from_str, extract_pets
-from .common_vars import config_kind
 
 
 class ConfigError(Exception):
@@ -205,7 +206,8 @@ def load_yaml_config(config_path: Path, kind: config_kind):
             has_other_parts = item.get("run_base") and item.get("run_name") and item.get("branch")
             if not has_exact_path and not has_other_parts:
                 raise ConfigError(
-                    f"Each run must have either 'exact_path' or all of 'run_base', 'run_name', and 'branch' set (error in runs[{i}])"
+                    "Each run must have either 'exact_path' or "
+                    f"all of 'run_base', 'run_name', and 'branch' set (error in runs[{i}])"
                 )
 
             run_settings.append(
diff --git a/src/access/esmf_trace/library.py b/src/access/esmf_trace/library.py
index 767030f..050fd1d 100644
--- a/src/access/esmf_trace/library.py
+++ b/src/access/esmf_trace/library.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 
 from .batch_runs import run_batch_jobs
-from .config import DefaultSettings, PostSummarySettings, load_yaml_config, RunSettings, PostRunSettings
+from .config import DefaultSettings, PostRunSettings, PostSummarySettings, RunSettings, load_yaml_config
 from .postprocess import post_summary_from_yaml
 
 
diff --git a/src/access/esmf_trace/main.py b/src/access/esmf_trace/main.py
index 31bab14..5963682 100644
--- a/src/access/esmf_trace/main.py
+++ b/src/access/esmf_trace/main.py
@@ -1,8 +1,8 @@
 import argparse
 from pathlib import Path
 
-from .common_vars import RUN_DEFAULT_FLAG_KEYS, RUN_DEFAULT_KEYS, POST_SUMMARY_DEFAULT_KEYS
-from .library import run_from_config, post_summary_from_config
+from .common_vars import POST_SUMMARY_DEFAULT_KEYS, RUN_DEFAULT_FLAG_KEYS, RUN_DEFAULT_KEYS
+from .library import post_summary_from_config, run_from_config
 
 
 def _add_run_overrides(parser: argparse.ArgumentParser) -> None:
diff --git a/src/access/esmf_trace/postprocess.py b/src/access/esmf_trace/postprocess.py
index f906ce0..713f8c6 100644
--- a/src/access/esmf_trace/postprocess.py
+++ b/src/access/esmf_trace/postprocess.py
@@ -1,11 +1,9 @@
-import argparse
-from dataclasses import replace
 import json
 from pathlib import Path
 
 import pandas as pd
 
-from .config import PostSummarySettings, PostRunSettings
+from .config import PostRunSettings, PostSummarySettings
 from .utils import output_dir_to_index, output_name_to_index
 
 

From ce17fcb989fc6c6d0548c08498fc0880f7dc3ccb Mon Sep 17 00:00:00 2001
From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com>
Date: Wed, 11 Feb 2026 16:57:25 +1100
Subject: [PATCH 8/8] Remove unused lines in postprocess.py

---
 src/access/esmf_trace/postprocess.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/src/access/esmf_trace/postprocess.py b/src/access/esmf_trace/postprocess.py
index 713f8c6..bbc9ec9 100644
--- a/src/access/esmf_trace/postprocess.py
+++ b/src/access/esmf_trace/postprocess.py
@@ -275,31 +275,3 @@ def post_summary_from_yaml(
         clean_parquet = combined_out.with_name(combined_out.stem + "_table.parquet")
         clean_df.to_parquet(clean_parquet, index=True)
         print(f"-- saved cleaned table parquet: {clean_parquet}")
-
-
-# def run_post_summary_from_yaml(ns: argparse.Namespace) -> None:
-#     """
-#     cli entrypoint for post-summary config from yaml and run the summary.
-#     """
-#     defaults, runs = load_yaml_config(Path(ns.config), kind="post-summary")
-#     assert isinstance(defaults, PostSummarySettings)
-
-#     # apply overrides to defaults (if any)
-#     overrides = {}
-#     if getattr(ns, "model_component", None) is not None:
-#         overrides["model_component"] = ns.model_component  # list[str]
-#     if getattr(ns, "pets", None) is not None:
-#         overrides["pets"] = ns.pets  # list[int]
-#     if getattr(ns, "stats_start_index", None) is not None:
-#         overrides["stats_start_index"] = ns.stats_start_index  # int
-#     if getattr(ns, "stats_end_index", None) is not None:
-#         overrides["stats_end_index"] = ns.stats_end_index  # int
-#     if getattr(ns, "timeseries_suffix", None) is not None:
-#         overrides["timeseries_suffix"] = ns.timeseries_suffix  # str
-#     if getattr(ns, "save_json_path", None) is not None:
-#         overrides["save_json_path"] = ns.save_json_path  # str
-
-#     if overrides:
-#         defaults = replace(defaults, **overrides)
-
-#     post_summary_from_yaml(defaults, runs, save_json_path=ns.save_json_path)