From 6234d9f7a75b782c697f66e7ec5b2bc7d1073d59 Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Wed, 11 Feb 2026 14:17:52 +1100 Subject: [PATCH 1/8] keep experiment-generator and experiment-runner as an extra bundle --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 40667a9..cfb2dff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,11 @@ addopts = [ testpaths = ["tests"] +access = [ + "experiment-generator", + "experiment-runner", +] + [tool.coverage.run] [tool.ruff] From 82c7415acaf96eb949478c5218c32db912b999d8 Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Thu, 12 Feb 2026 14:43:42 +1100 Subject: [PATCH 2/8] Remove the optional access section & add an interactive section --- pyproject.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cfb2dff..40667a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,11 +64,6 @@ addopts = [ testpaths = ["tests"] -access = [ - "experiment-generator", - "experiment-runner", -] - [tool.coverage.run] [tool.ruff] From 2e4bb8c2b490259fa1669f8c692ea04ff0ffa19d Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:45:42 +1100 Subject: [PATCH 3/8] Rename extract_index_list to extract_index_list_from_str --- src/access/esmf_trace/batch_runs.py | 4 ++-- src/access/esmf_trace/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/access/esmf_trace/batch_runs.py b/src/access/esmf_trace/batch_runs.py index bb8bf19..a80fe6b 100644 --- a/src/access/esmf_trace/batch_runs.py +++ b/src/access/esmf_trace/batch_runs.py @@ -6,7 +6,7 @@ from .config import ConfigError, DefaultSettings, RunSettings from .run import run as single_run -from .utils import extract_index_list, output_name_to_index +from .utils import extract_index_list_from_str, output_name_to_index def _find_traceout_dir(output_dir: Path, stream_prefix: str) -> Path | None: @@ -29,7 +29,7 @@ def _gather_outputs(archive_dir: Path, output_index: str | None) -> list[Path]: all_outputs = [p for p in archive_dir.glob("output*") if p.is_dir()] all_outputs = [p for p in all_outputs if output_name_to_index(p) is not None] output_dirs = sorted(all_outputs, key=output_name_to_index) - selected = extract_index_list(output_index) + selected = extract_index_list_from_str(output_index) if selected is not None: sel = set(selected) present = {output_name_to_index(p) for p in output_dirs} diff --git a/src/access/esmf_trace/utils.py b/src/access/esmf_trace/utils.py index de68e40..03df9c7 100644 --- a/src/access/esmf_trace/utils.py +++ b/src/access/esmf_trace/utils.py @@ -19,7 +19,7 @@ def output_dir_to_index(p: Path) -> int | None: return output_name_to_index(p.name) -def extract_index_list(s: str | None) -> list[int] | None: +def extract_index_list_from_str(s: str | None) -> list[int] | None: """ Parse '0,2-4,9' -> [0,2,3,4,9] """ From cb12246877acd0ae1273dab2543dd8f17446cf6a Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:46:15 +1100 Subject: [PATCH 4/8] Fix Path symlink --- src/access/esmf_trace/ctf_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/access/esmf_trace/ctf_parser.py b/src/access/esmf_trace/ctf_parser.py index 0a36a21..b76bca3 100644 --- a/src/access/esmf_trace/ctf_parser.py +++ b/src/access/esmf_trace/ctf_parser.py @@ -98,9 +98,9 @@ def open_selected_streams(traceout_path: Path, stream_paths: iter): tmpdir = Path(tempfile.mkdtemp(prefix="ctf_stage_")).resolve() try: # link metadata and the selected streams into the temp bundle - Path.symlink(meta, tmpdir / "metadata", target_is_directory=False) + (tmpdir / "metadata").symlink_to(meta) for s in streams: - Path.symlink(s, tmpdir / s.name, target_is_directory=False) + (tmpdir / s.name).symlink_to(s) yield bt2.TraceCollectionMessageIterator(str(tmpdir)) finally: From 094580fce417d1b116f0993853a5d9cb4aa5243c Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:48:50 +1100 Subject: [PATCH 5/8] Refactor postsummary and postrun from postprocess.py to config.py and refactor cli with library input --- src/access/esmf_trace/common_vars.py | 34 ++++ src/access/esmf_trace/config.py | 231 ++++++++++++++++++++------- src/access/esmf_trace/library.py | 55 +++++++ src/access/esmf_trace/main.py | 135 +++++++++------- src/access/esmf_trace/postprocess.py | 172 ++++++-------------- 5 files changed, 387 insertions(+), 240 deletions(-) create mode 100644 src/access/esmf_trace/library.py diff --git a/src/access/esmf_trace/common_vars.py b/src/access/esmf_trace/common_vars.py index b99e818..73d8bf8 100644 --- a/src/access/esmf_trace/common_vars.py +++ b/src/access/esmf_trace/common_vars.py @@ -1 +1,35 @@ +from typing import Literal + + seconds_to_nanoseconds = 1e9 + +# For now, two config kinds: "run" and "post-summary" are included. +# This might be extended if we want to support more config kinds. +config_kind = Literal["run", "post-summary"] + +# Common keys for both run and post-summary configs +RUN_DEFAULT_FLAG_KEYS = [ + "merge_adjacent", + "xaxis_datetime", + "separate_plots", + "show_html", +] + +RUN_DEFAULT_KEYS = [ + "stream_prefix", + "model_component", + "max_depth", + "merge_gap_ns", + "cmap", + "renderer", + "max_workers", +] + +POST_SUMMARY_DEFAULT_KEYS = [ + "timeseries_suffix", + "save_json_path", + "stats_start_index", + "stats_end_index", + "pets", + "model_component", +] diff --git a/src/access/esmf_trace/config.py b/src/access/esmf_trace/config.py index 40f294a..ba8f29c 100644 --- a/src/access/esmf_trace/config.py +++ b/src/access/esmf_trace/config.py @@ -1,5 +1,9 @@ from dataclasses import dataclass from pathlib import Path +from typing import Literal, overload +from .tmp_yaml_parser import read_yaml +from .utils import extract_index_list_from_str, extract_pets +from .common_vars import config_kind class ConfigError(Exception): @@ -84,66 +88,185 @@ def to_job_kwargs( } -def _require_key(d: dict, keys: list[str]) -> str: +@dataclass(frozen=True) +class PostSummarySettings: + post_base_path: Path + model_component: list[str] | None = None + pets: list[int] | None = None + stats_start_index: int | None = None + stats_end_index: int | None = None + timeseries_suffix: str = "_timeseries.json" + save_json_path: Path | None = None + + +@dataclass(frozen=True) +class PostRunSettings: + name: str + output_index: list[str] | None = None + model_component: list[str] | None = None + pets: list[int] | None = None + stats_start_index: int | None = None + stats_end_index: int | None = None + save_json_path: Path | None = None + + +def _as_mapping(x, what: str) -> dict: + if not isinstance(x, dict): + raise ConfigError(f"{what} must be a mapping (dict)") + return x + + +def _as_list(x, what: str) -> list: + if not isinstance(x, list): + raise ConfigError(f"{what} must be a list") + return x + + +def _require_keys(d: dict, keys: list[str], where: str) -> None: missing = [k for k in keys if k not in d] if missing: - raise ConfigError(f"missing required config key(s): {', '.join(missing)}") - - -def _parse_defaults(d: dict) -> DefaultSettings: - return DefaultSettings( - post_base_path=d.get("post_base_path"), - stream_prefix=d.get("stream_prefix", "esmf_stream"), - model_component=d.get("model_component", "[ESMF]/[ensemble] RunPhase1/[ESM0001] RunPhase1"), - max_workers=d.get("max_workers"), - xaxis_datetime=bool(d.get("xaxis_datetime", False)), - separate_plots=bool(d.get("separate_plots", False)), - cmap=d.get("cmap", "tab10"), - renderer=d.get("renderer", "browser"), - show_html=bool(d.get("show_html", False)), - max_depth=int(d.get("max_depth", 6)), - merge_adjacent=bool(d.get("merge_adjacent", False)), - merge_gap_ns=int(d.get("merge_gap_ns", 1000)), - ) - - -def _parse_runs(lst: list[dict]) -> list[RunSettings]: - runs = [] - for item in lst: - if not isinstance(item, dict): - raise ConfigError("Each run must be a mapping (dict)") - - has_exact_path = item.get("exact_path") - has_other_parts = item.get("run_base") and item.get("run_name") and item.get("branch") - if not has_exact_path and not has_other_parts: - raise ConfigError( - "Each run must have either 'exact_path' or all of 'run_base', 'run_name', and 'branch' set" - ) + raise ConfigError(f"missing required config key(s) in {where}: {', '.join(missing)}") + + +def _norm_model_component(v: str | list | tuple | set | None) -> list[str] | None: + """ + Normalise model_component to a list of strings. + Accepts a comma-separated str or a list[str]. + """ + if v is None: + return None + + if isinstance(v, (list, tuple, set)): + parts = [str(x).strip() for x in v if str(x).strip()] + return parts or None + + s = str(v).strip() + if not s: + return None + + # split on commas + parts = [p.strip() for p in s.split(",") if p.strip()] + return parts or None + - runs.append( - RunSettings( - base_prefix=item.get("base_prefix"), - post_base_path=item.get("post_base_path"), - exact_path=Path(item["exact_path"]) if item.get("exact_path") else None, - run_base=Path(item["run_base"]) if item.get("run_base") else None, - run_name=item.get("run_name"), - branch=item.get("branch"), - pets=item.get("pets"), - model_component=item.get("model_component"), - output_index=item.get("output_index"), +def _norm_int_or_none(v: int | str | None) -> int | None: + if v is None or v == "": + return None + return int(v) + + +def _norm_path_or_none(v: str | Path | None) -> Path | None: + if v is None: + return None + return Path(v).expanduser() + + +# define overloads for type checking of load_yaml_config +@overload +def load_yaml_config(config_path: Path, kind: Literal["run"]) -> (DefaultSettings, list[RunSettings]): ... +@overload +def load_yaml_config( + config_path: Path, kind: Literal["post-summary"] +) -> (PostSummarySettings, list[PostRunSettings]): ... + + +def load_yaml_config(config_path: Path, kind: config_kind): + """ + Load and validate an esmf-trace yaml configuration file. + """ + config_path = Path(config_path) + data = read_yaml(config_path) + + _require_keys(data, ["default_settings", "runs"], where=str(config_path)) + default = _as_mapping(data["default_settings"], what="default_settings") + runs = _as_list(data["runs"], what="runs") + + if kind == "run": + defaults = DefaultSettings( + post_base_path=default.get("post_base_path"), + stream_prefix=default.get("stream_prefix", "esmf_stream"), + model_component=default.get("model_component", "[ESMF]/[ensemble] RunPhase1/[ESM0001] RunPhase1"), + max_workers=default.get("max_workers"), + xaxis_datetime=bool(default.get("xaxis_datetime", False)), + separate_plots=bool(default.get("separate_plots", False)), + cmap=default.get("cmap", "tab10"), + renderer=default.get("renderer", "browser"), + show_html=bool(default.get("show_html", False)), + max_depth=int(default.get("max_depth", 6)), + merge_adjacent=bool(default.get("merge_adjacent", False)), + merge_gap_ns=int(default.get("merge_gap_ns", 1000)), + ) + + run_settings: list[RunSettings] = [] + for i, item in enumerate(runs): + item = _as_mapping(item, what=f"runs[{i}]") + + has_exact_path = item.get("exact_path") + has_other_parts = item.get("run_base") and item.get("run_name") and item.get("branch") + if not has_exact_path and not has_other_parts: + raise ConfigError( + f"Each run must have either 'exact_path' or all of 'run_base', 'run_name', and 'branch' set (error in runs[{i}])" + ) + + run_settings.append( + RunSettings( + base_prefix=item.get("base_prefix"), + post_base_path=item.get("post_base_path"), + exact_path=_norm_path_or_none(item.get("exact_path") if item.get("exact_path") else None), + run_base=_norm_path_or_none(item.get("run_base") if item.get("run_base") else None), + run_name=item.get("run_name"), + branch=item.get("branch"), + archive=item.get("archive", "archive"), + pets=item.get("pets"), + model_component=item.get("model_component"), + output_index=item.get("output_index"), + ) ) + + return defaults, run_settings + + if kind == "post-summary": + post_base = default.get("post_base_path") + if not post_base: + raise ConfigError("default_settings.post_base_path is required for post-summary config") + + defaults = PostSummarySettings( + post_base_path=Path(post_base).expanduser(), + model_component=_norm_model_component(default.get("model_component")), + pets=extract_pets(default.get("pets") if default.get("pets") is not None else None), + stats_start_index=_norm_int_or_none(default.get("stats_start_index")), + stats_end_index=_norm_int_or_none(default.get("stats_end_index")), + timeseries_suffix=default.get("timeseries_suffix", "_timeseries.json"), + save_json_path=_norm_path_or_none(default.get("save_json_path")), ) - return runs + post_runs: list[PostRunSettings] = [] + for i, item in enumerate(runs): + item = _as_mapping(item, what=f"runs[{i}]") + _require_keys(item, ["name"], where=f"runs[{i}]") -def load_config(input_config: dict) -> (DefaultSettings, list[RunSettings]): - _require_key(input_config, ["default_settings", "runs"]) + oi = item.get("output_index") + if isinstance(oi, list): + output_index = [int(x) for x in oi] + elif isinstance(oi, str): + output_index = extract_index_list_from_str(oi) + else: + output_index = None - if not isinstance(input_config["default_settings"], dict): - raise ConfigError("'default_settings' must be a dict") - if not isinstance(input_config["runs"], list): - raise ConfigError("'runs' must be a list") + pets_input = item.get("pets", defaults.pets) + pets = pets_input if isinstance(pets_input, list) or pets_input is None else extract_pets(str(pets_input)) + + post_runs.append( + PostRunSettings( + name=str(item["name"]), + output_index=output_index, + model_component=_norm_model_component(item.get("model_component", defaults.model_component)), + pets=pets, + stats_start_index=_norm_int_or_none(item.get("stats_start_index", default.stats_start_index)), + stats_end_index=_norm_int_or_none(item.get("stats_end_index", default.stats_end_index)), + save_json_path=_norm_path_or_none(item.get("save_json_path", default.save_json_path)), + ) + ) + return defaults, post_runs - defaults = _parse_defaults(input_config["default_settings"]) - runs = _parse_runs(input_config["runs"]) - return defaults, runs + raise ValueError(f"Invalid config kind: {kind}") diff --git a/src/access/esmf_trace/library.py b/src/access/esmf_trace/library.py new file mode 100644 index 0000000..767030f --- /dev/null +++ b/src/access/esmf_trace/library.py @@ -0,0 +1,55 @@ +from dataclasses import replace +from pathlib import Path + +from .batch_runs import run_batch_jobs +from .config import DefaultSettings, PostSummarySettings, load_yaml_config, RunSettings, PostRunSettings +from .postprocess import post_summary_from_yaml + + +def run_from_config( + config_path: str | Path | dict, + run_overrides: dict | None = None, +): + """ + Either a yaml path or a dict with the same structure. + + run_overrides: optional dict of DefaultSettings field overrides + e.g. {"stream_prefix": "esmf_stream", "max_workers": 8} + """ + + if isinstance(config_path, (str, Path)): + defaults, runs = load_yaml_config(Path(config_path), kind="run") + else: + defaults = DefaultSettings(**config_path["default_settings"]) + runs = [RunSettings(**r) for r in config_path["runs"]] + + if run_overrides: + defaults = replace(defaults, **dict(run_overrides)) + + run_batch_jobs(defaults, runs) + + +def post_summary_from_config( + config_path: str | Path | dict, + post_overrides: dict | None = None, + save_json_path: str | Path | None = None, +): + """ + Either a yaml path or a dict with the same structure. + + post_overrides: optional dict of PostSummarySettings field overrides + e.g. {"timeseries_suffix": "_timeseries.json", "stats_start_index": 1} + """ + + if isinstance(config_path, (str, Path)): + defaults, runs = load_yaml_config(Path(config_path), kind="post-summary") + assert isinstance(defaults, PostSummarySettings) + else: + defaults = PostSummarySettings(**config_path["default_settings"]) + runs = [PostRunSettings(**r) for r in config_path["runs"]] + + if post_overrides: + defaults = replace(defaults, **dict(post_overrides)) + + out_path = str(save_json_path) if save_json_path is not None else None + post_summary_from_yaml(defaults, runs, save_json_path=out_path) diff --git a/src/access/esmf_trace/main.py b/src/access/esmf_trace/main.py index 243fc5f..31bab14 100644 --- a/src/access/esmf_trace/main.py +++ b/src/access/esmf_trace/main.py @@ -1,18 +1,15 @@ import argparse -from dataclasses import replace from pathlib import Path -from .batch_runs import run_batch_jobs -from .config import DefaultSettings, load_config -from .postprocess import run_post_summary_from_yaml -from .tmp_yaml_parser import read_yaml +from .common_vars import RUN_DEFAULT_FLAG_KEYS, RUN_DEFAULT_KEYS, POST_SUMMARY_DEFAULT_KEYS +from .library import run_from_config, post_summary_from_config -def _override_run_args(ns: argparse.Namespace) -> None: +def _add_run_overrides(parser: argparse.ArgumentParser) -> None: """ Optional overrides from command line args to config settings. """ - arg = ns.add_argument_group("overrides", "Optional overrides to config settings") + arg = parser.add_argument_group("overrides", "Optional overrides to config settings") arg.add_argument( "--stream-prefix", @@ -74,40 +71,56 @@ def _override_run_args(ns: argparse.Namespace) -> None: ) -def _apply_overrides(ns: argparse.Namespace, defaults: DefaultSettings) -> DefaultSettings: +def _apply_run_overrides(ns: argparse.Namespace) -> dict: """ Apply any command line overrides to the run defaults. """ - updates = {} + overrides = {} # booleans only override when True provided - if getattr(ns, "merge_adjacent", False): - updates["merge_adjacent"] = True - if getattr(ns, "xaxis_datetime", False): - updates["xaxis_datetime"] = True - if getattr(ns, "separate_plots", False): - updates["separate_plots"] = True - if getattr(ns, "show_html", False): - updates["show_html"] = True + for flag in RUN_DEFAULT_FLAG_KEYS: + if getattr(ns, flag, False): + overrides[flag] = True # None means no override - for f in [ - "stream_prefix", - "model_component", - "max_depth", - "merge_gap_ns", - "cmap", - "renderer", - "max_workers", - ]: + for f in RUN_DEFAULT_KEYS: v = getattr(ns, f, None) if v is not None: - updates[f] = v + overrides[f] = v - return replace(defaults, **updates) if updates else defaults + return overrides -def _add_run_from_yaml_subparser(subparsers) -> None: +def _add_post_summary_overrides(parser: argparse.ArgumentParser) -> None: + """ + Add optional override arguments for the post-summary-from-yaml command. + """ + arg = parser.add_argument_group("overrides", "Optional overrides to config settings") + + arg.add_argument("--model-component", nargs="+", help="Full model_component name(s) to include.") + arg.add_argument("--pets", nargs="+", type=int, help="PET index(es) to include.") + arg.add_argument("--stats-start-index", type=int, help="Slice start (iloc) per series.") + arg.add_argument("--stats-end-index", type=int, help="Slice end (iloc, exclusive) per series.") + arg.add_argument( + "--timeseries-suffix", type=str, help="Timeseries filename suffix to match (e.g., _timeseries.json)." + ) + arg.add_argument("--save-json-path", type=Path, help="Save combined summary JSON to this path.") + + +def _apply_post_summary_overrides(ns: argparse.Namespace) -> dict: + overrides = {} + + for f in POST_SUMMARY_DEFAULT_KEYS: + v = getattr(ns, f, None) + if v is not None: + if f == "save_json_path" and isinstance(v, Path): + v = str(v) + overrides[f] = v + + return overrides + + +def _add_run_command(subparsers) -> None: """ run-from-yaml: Process multiple traceout directories from a yaml config file @@ -125,12 +138,12 @@ def _add_run_from_yaml_subparser(subparsers) -> None: ) # Optional overrides - _override_run_args(rs) + _add_run_overrides(rs) - rs.set_defaults(func=run_from_yaml_config) + rs.set_defaults(func=cli_run_from_yaml) -def _add_post_summary_from_yaml_subparser(subparsers) -> None: +def _add_post_summary_command(subparsers) -> None: """ post-summary-from-yaml: Summarise existing *_timeseries.json files by reading a YAML file that lists: @@ -150,52 +163,52 @@ def _add_post_summary_from_yaml_subparser(subparsers) -> None: help="yaml config file for postprocessing summary", ) - arg = ps.add_argument_group("overrides", "Optional overrides to config settings") - - # Optional override - arg.add_argument("--model-component", nargs="+", help="Full model_component name(s) to include.") - arg.add_argument("--pets", nargs="+", type=int, help="PET index(es) to include.") - arg.add_argument("--stats-start-index", type=int, help="Slice start (iloc) per series.") - arg.add_argument( - "--stats-end-index", type=int, help="Slice end (iloc, exclusive) per series. Default: full length." - ) - arg.add_argument( - "--timeseries-suffix", - type=str, - default="_timeseries.json", - help="Timeseries filename suffix to match (default: _timeseries.json).", - ) - arg.add_argument( - "--save-json-path", type=Path, help="Save summary to json format file (otherwise prints to stdout)." - ) + # Optional overrides + _add_post_summary_overrides(ps) - ps.set_defaults(func=run_post_summary_from_yaml) + ps.set_defaults(func=cli_post_summary_from_yaml) -def run_from_yaml_config( +def cli_run_from_yaml( ns: argparse.Namespace, ) -> None: """ Run multiple jobs from a yaml config file with optional command line overrides. """ - input_config = read_yaml(ns.config) - defaults, runs = load_config(input_config) - # overides - defaults = _apply_overrides(ns, defaults) - run_batch_jobs(defaults, runs) + run_from_config(ns.config, run_overrides=_apply_run_overrides(ns)) -def main(): +def cli_post_summary_from_yaml( + ns: argparse.Namespace, +) -> None: + """ + Summarise existing e.g. *_timeseries.json files by reading a yaml file that lists: + - post_base_path + - cases: [{ name: postprocessing_, output_index: [optional list of ints] }, ...] + """ + post_summary_from_config( + ns.config, + post_overrides=_apply_post_summary_overrides(ns), + save_json_path=ns.save_json_path, + ) + + +def build_parser() -> argparse.ArgumentParser: + """ + Build and return the CLI argument parser. + """ parser = argparse.ArgumentParser( prog="esmf-trace", description="ESMF traceout analysis and visualisation.", ) - subparsers = parser.add_subparsers(dest="cmd", required=True) + _add_run_command(subparsers) + _add_post_summary_command(subparsers) + return parser - _add_run_from_yaml_subparser(subparsers) - _add_post_summary_from_yaml_subparser(subparsers) +def main(): + parser = build_parser() args = parser.parse_args() args.func(args) diff --git a/src/access/esmf_trace/postprocess.py b/src/access/esmf_trace/postprocess.py index 790554d..f906ce0 100644 --- a/src/access/esmf_trace/postprocess.py +++ b/src/access/esmf_trace/postprocess.py @@ -1,11 +1,12 @@ import argparse +from dataclasses import replace import json from pathlib import Path import pandas as pd -from .tmp_yaml_parser import read_yaml -from .utils import extract_pets, output_dir_to_index, output_name_to_index +from .config import PostSummarySettings, PostRunSettings +from .utils import output_dir_to_index, output_name_to_index def _load_timeseries_json(p: Path) -> pd.DataFrame: @@ -85,106 +86,6 @@ def _collect_case_jsons( return jsons -def _as_list_or_none(v) -> list | None: - if v is None: - return None - if isinstance(v, (list, tuple, set)): - return list(v) - return [v] - - -def _norm_model_component(v) -> list[str] | None: - """ - Normalise model_component to a list of strings. - Accepts a comma-separated str or a list[str]. - """ - if v is None: - return None - if isinstance(v, (list, tuple, set)): - parts = [str(x).strip() for x in v if str(x).strip()] - return parts or None - s = str(v).strip() - if not s: - return None - # split on commas - parts = [p.strip() for p in s.split(",") if p.strip()] - return parts or None - - -def _norm_pets(v) -> list[int] | None: - if v is None: - return None - if isinstance(v, str): - return _as_list_or_none(extract_pets(v)) - if isinstance(v, (list, tuple, set)): - return [int(x) for x in v] - return [int(v)] - - -def _norm_end(v): - if v is None or v == "": - return None - return int(v) - - -def load_post_runs_config(config_path: Path) -> tuple[dict, list[dict]]: - """ - Parse 'postprocessing.yaml' with: - default_settings: - { post_base_path, model_component?, pets?, stats_start_index?, stats_end_index?, timeseries_suffix? } - runs: [ { name, output_index?, model_component?, pets?, stats_start_index?, stats_end_index? }, ... ] - """ - data = read_yaml(config_path) - - if "default_settings" not in data or "runs" not in data: - raise ValueError("YAML must have 'default_settings' and 'runs' keys.") - - dflt = data["default_settings"] - runs = data["runs"] - - if not isinstance(dflt, dict): - raise ValueError("'default_settings' must be a mapping.") - if not isinstance(runs, list) or not runs: - raise ValueError("'runs' must be a non-empty list.") - - post_base_path = dflt.get("post_base_path") - if not post_base_path: - raise ValueError("'default_settings.post_base_path' is required.") - - defaults = { - "post_base_path": Path(post_base_path).expanduser().resolve(), - "model_component": _norm_model_component(dflt.get("model_component")), - "pets": _norm_pets(dflt.get("pets")), - "stats_start_index": ( - int(dflt.get("stats_start_index")) if dflt.get("stats_start_index") is not None else None - ), - "stats_end_index": _norm_end(dflt.get("stats_end_index")), - "timeseries_suffix": dflt.get("timeseries_suffix", "_timeseries.json"), - "save_json_path": (Path(dflt["save_json_path"]).expanduser() if dflt.get("save_json_path") else None), - } - - norm_runs: list[dict] = [] - for r in runs: - norm_runs.append( - { - "name": str(r["name"]), - "output_index": ([int(x) for x in r["output_index"]] if r.get("output_index") is not None else None), - "model_component": _norm_model_component(r.get("model_component", defaults["model_component"])), - "pets": _norm_pets(r.get("pets", defaults["pets"])), - "stats_start_index": ( - int(r.get("stats_start_index")) - if r.get("stats_start_index") is not None - else defaults["stats_start_index"] - ), - "stats_end_index": _norm_end( - r.get("stats_end_index") if r.get("stats_end_index") is not None else defaults["stats_end_index"] - ), - "save_json_path": (Path(r["save_json_path"]).expanduser() if r.get("save_json_path") else None), - } - ) - return defaults, norm_runs - - def _summarise_case( json_paths: list[Path], model_component: list[str] | None, @@ -295,8 +196,7 @@ def _summarise_case( combined_by_comp["__case_name"] + "/combine/" + combined_by_comp["model_component"].astype(str).str.strip() ) - out = pd.concat([per_output[output_cols], combined_by_comp[output_cols]], ignore_index=True) - return out + return pd.concat([per_output[output_cols], combined_by_comp[output_cols]], ignore_index=True) def _resolve_save_json_path(save_json_path: str | None) -> Path | None: @@ -309,42 +209,39 @@ def _resolve_save_json_path(save_json_path: str | None) -> Path | None: return p -def run_post_summary_from_yaml(ns: argparse.Namespace) -> None: - """ - Build per-output rows + a combined row for each selected case (from yaml), - then print a combined table. - """ - defaults, runs = load_post_runs_config(Path(ns.config)) - - post_base_path: Path = defaults["post_base_path"] - timeseries_suffix: str = defaults["timeseries_suffix"] +def post_summary_from_yaml( + defaults: PostSummarySettings, + runs: list[PostRunSettings], + save_json_path: str | None = None, +) -> pd.DataFrame: + post_base_path: Path = defaults.post_base_path + timeseries_suffix: str = defaults.timeseries_suffix per_case_tables: list[pd.DataFrame] = [] - # process each run (case) for r in runs: - case_name = r["name"] + case_name = r.name jsons = _collect_case_jsons( post_base_path=post_base_path, case_name=case_name, - output_index=r["output_index"], + output_index=r.output_index, timeseries_suffix=timeseries_suffix, ) case_summary = _summarise_case( json_paths=jsons, - model_component=r["model_component"], - pets=r["pets"], - stats_start_index=r["stats_start_index"], - stats_end_index=r["stats_end_index"], + model_component=r.model_component, + pets=r.pets, + stats_start_index=r.stats_start_index, + stats_end_index=r.stats_end_index, ) if case_summary.empty: continue # Save per-run json if this run specified a save path (strict .json) - per_run_save = _resolve_save_json_path(r.get("save_json_path")) + per_run_save = _resolve_save_json_path(r.save_json_path) if per_run_save is not None: ( case_summary.reset_index(drop=True).to_json( # ensure a clean row index @@ -370,10 +267,7 @@ def run_post_summary_from_yaml(ns: argparse.Namespace) -> None: print("-- Summary table:") print(clean_df) - # save combined json if requested: cli override, else defaults - cli_combined = getattr(ns, "save_json_path", None) - default_combined = defaults.get("save_json_path") - combined_out = _resolve_save_json_path(cli_combined or default_combined) + combined_out = _resolve_save_json_path(save_json_path or defaults.save_json_path) if combined_out is not None: (combined_df.rename(columns={"__row_label": "name"}).to_json(combined_out, orient="records", indent=2)) @@ -383,3 +277,31 @@ def run_post_summary_from_yaml(ns: argparse.Namespace) -> None: clean_parquet = combined_out.with_name(combined_out.stem + "_table.parquet") clean_df.to_parquet(clean_parquet, index=True) print(f"-- saved cleaned table parquet: {clean_parquet}") + + +# def run_post_summary_from_yaml(ns: argparse.Namespace) -> None: +# """ +# cli entrypoint for post-summary config from yaml and run the summary. +# """ +# defaults, runs = load_yaml_config(Path(ns.config), kind="post-summary") +# assert isinstance(defaults, PostSummarySettings) + +# # apply overrides to defaults (if any) +# overrides = {} +# if getattr(ns, "model_component", None) is not None: +# overrides["model_component"] = ns.model_component # list[str] +# if getattr(ns, "pets", None) is not None: +# overrides["pets"] = ns.pets # list[int] +# if getattr(ns, "stats_start_index", None) is not None: +# overrides["stats_start_index"] = ns.stats_start_index # int +# if getattr(ns, "stats_end_index", None) is not None: +# overrides["stats_end_index"] = ns.stats_end_index # int +# if getattr(ns, "timeseries_suffix", None) is not None: +# overrides["timeseries_suffix"] = ns.timeseries_suffix # str +# if getattr(ns, "save_json_path", None) is not None: +# overrides["save_json_path"] = ns.save_json_path # str + +# if overrides: +# defaults = replace(defaults, **overrides) + +# post_summary_from_yaml(defaults, runs, save_json_path=ns.save_json_path) From a4206ceff5afce02e64078a50a50919061c69937 Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:50:19 +1100 Subject: [PATCH 6/8] Export run_from_config and post_summary_from_config at top level --- src/access/esmf_trace/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/access/esmf_trace/__init__.py b/src/access/esmf_trace/__init__.py index 704f5ca..8c1eef1 100644 --- a/src/access/esmf_trace/__init__.py +++ b/src/access/esmf_trace/__init__.py @@ -7,3 +7,13 @@ with suppress(PackageNotFoundError): __version__ = version("esmf_trace") + +from access.esmf_trace.library import ( + run_from_config, + post_summary_from_config, +) + +__all__ = [ + "run_from_config", + "post_summary_from_config", +] From 5bcc5f14aaecbf8da67f822c572a6e8546ca2f98 Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Wed, 11 Feb 2026 13:58:57 +1100 Subject: [PATCH 7/8] ruff check --fix --- src/access/esmf_trace/__init__.py | 2 +- src/access/esmf_trace/common_vars.py | 1 - src/access/esmf_trace/config.py | 6 ++++-- src/access/esmf_trace/library.py | 2 +- src/access/esmf_trace/main.py | 4 ++-- src/access/esmf_trace/postprocess.py | 4 +--- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/access/esmf_trace/__init__.py b/src/access/esmf_trace/__init__.py index 8c1eef1..d1b4846 100644 --- a/src/access/esmf_trace/__init__.py +++ b/src/access/esmf_trace/__init__.py @@ -9,8 +9,8 @@ __version__ = version("esmf_trace") from access.esmf_trace.library import ( - run_from_config, post_summary_from_config, + run_from_config, ) __all__ = [ diff --git a/src/access/esmf_trace/common_vars.py b/src/access/esmf_trace/common_vars.py index 73d8bf8..283ae74 100644 --- a/src/access/esmf_trace/common_vars.py +++ b/src/access/esmf_trace/common_vars.py @@ -1,6 +1,5 @@ from typing import Literal - seconds_to_nanoseconds = 1e9 # For now, two config kinds: "run" and "post-summary" are included. diff --git a/src/access/esmf_trace/config.py b/src/access/esmf_trace/config.py index ba8f29c..6bc9003 100644 --- a/src/access/esmf_trace/config.py +++ b/src/access/esmf_trace/config.py @@ -1,9 +1,10 @@ from dataclasses import dataclass from pathlib import Path from typing import Literal, overload + +from .common_vars import config_kind from .tmp_yaml_parser import read_yaml from .utils import extract_index_list_from_str, extract_pets -from .common_vars import config_kind class ConfigError(Exception): @@ -205,7 +206,8 @@ def load_yaml_config(config_path: Path, kind: config_kind): has_other_parts = item.get("run_base") and item.get("run_name") and item.get("branch") if not has_exact_path and not has_other_parts: raise ConfigError( - f"Each run must have either 'exact_path' or all of 'run_base', 'run_name', and 'branch' set (error in runs[{i}])" + "Each run must have either 'exact_path' or " + f"all of 'run_base', 'run_name', and 'branch' set (error in runs[{i}])" ) run_settings.append( diff --git a/src/access/esmf_trace/library.py b/src/access/esmf_trace/library.py index 767030f..050fd1d 100644 --- a/src/access/esmf_trace/library.py +++ b/src/access/esmf_trace/library.py @@ -2,7 +2,7 @@ from pathlib import Path from .batch_runs import run_batch_jobs -from .config import DefaultSettings, PostSummarySettings, load_yaml_config, RunSettings, PostRunSettings +from .config import DefaultSettings, PostRunSettings, PostSummarySettings, RunSettings, load_yaml_config from .postprocess import post_summary_from_yaml diff --git a/src/access/esmf_trace/main.py b/src/access/esmf_trace/main.py index 31bab14..5963682 100644 --- a/src/access/esmf_trace/main.py +++ b/src/access/esmf_trace/main.py @@ -1,8 +1,8 @@ import argparse from pathlib import Path -from .common_vars import RUN_DEFAULT_FLAG_KEYS, RUN_DEFAULT_KEYS, POST_SUMMARY_DEFAULT_KEYS -from .library import run_from_config, post_summary_from_config +from .common_vars import POST_SUMMARY_DEFAULT_KEYS, RUN_DEFAULT_FLAG_KEYS, RUN_DEFAULT_KEYS +from .library import post_summary_from_config, run_from_config def _add_run_overrides(parser: argparse.ArgumentParser) -> None: diff --git a/src/access/esmf_trace/postprocess.py b/src/access/esmf_trace/postprocess.py index f906ce0..713f8c6 100644 --- a/src/access/esmf_trace/postprocess.py +++ b/src/access/esmf_trace/postprocess.py @@ -1,11 +1,9 @@ -import argparse -from dataclasses import replace import json from pathlib import Path import pandas as pd -from .config import PostSummarySettings, PostRunSettings +from .config import PostRunSettings, PostSummarySettings from .utils import output_dir_to_index, output_name_to_index From ce17fcb989fc6c6d0548c08498fc0880f7dc3ccb Mon Sep 17 00:00:00 2001 From: minghangli-uni <24727729+minghangli-uni@users.noreply.github.com> Date: Wed, 11 Feb 2026 16:57:25 +1100 Subject: [PATCH 8/8] Remove unused lines in postprocess.py --- src/access/esmf_trace/postprocess.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/src/access/esmf_trace/postprocess.py b/src/access/esmf_trace/postprocess.py index 713f8c6..bbc9ec9 100644 --- a/src/access/esmf_trace/postprocess.py +++ b/src/access/esmf_trace/postprocess.py @@ -275,31 +275,3 @@ def post_summary_from_yaml( clean_parquet = combined_out.with_name(combined_out.stem + "_table.parquet") clean_df.to_parquet(clean_parquet, index=True) print(f"-- saved cleaned table parquet: {clean_parquet}") - - -# def run_post_summary_from_yaml(ns: argparse.Namespace) -> None: -# """ -# cli entrypoint for post-summary config from yaml and run the summary. -# """ -# defaults, runs = load_yaml_config(Path(ns.config), kind="post-summary") -# assert isinstance(defaults, PostSummarySettings) - -# # apply overrides to defaults (if any) -# overrides = {} -# if getattr(ns, "model_component", None) is not None: -# overrides["model_component"] = ns.model_component # list[str] -# if getattr(ns, "pets", None) is not None: -# overrides["pets"] = ns.pets # list[int] -# if getattr(ns, "stats_start_index", None) is not None: -# overrides["stats_start_index"] = ns.stats_start_index # int -# if getattr(ns, "stats_end_index", None) is not None: -# overrides["stats_end_index"] = ns.stats_end_index # int -# if getattr(ns, "timeseries_suffix", None) is not None: -# overrides["timeseries_suffix"] = ns.timeseries_suffix # str -# if getattr(ns, "save_json_path", None) is not None: -# overrides["save_json_path"] = ns.save_json_path # str - -# if overrides: -# defaults = replace(defaults, **overrides) - -# post_summary_from_yaml(defaults, runs, save_json_path=ns.save_json_path)