diff --git a/src/access/esmf_trace/config.py b/src/access/esmf_trace/config.py index 9b940c8..c6e6a04 100644 --- a/src/access/esmf_trace/config.py +++ b/src/access/esmf_trace/config.py @@ -103,7 +103,7 @@ class PostSummarySettings: @dataclass(frozen=True) class PostRunSettings: name: str - output_index: list[str] | None = None + output_index: list[int] | None = None model_component: list[str] | None = None pets: list[int] | None = None stats_start_index: int | None = None @@ -266,9 +266,9 @@ def load_yaml_config(config_path: Path, kind: config_kind): output_index=output_index, model_component=_norm_model_component(item.get("model_component", defaults.model_component)), pets=pets, - stats_start_index=_norm_int_or_none(item.get("stats_start_index", default.stats_start_index)), - stats_end_index=_norm_int_or_none(item.get("stats_end_index", default.stats_end_index)), - save_json_path=_norm_path_or_none(item.get("save_json_path", default.save_json_path)), + stats_start_index=_norm_int_or_none(item.get("stats_start_index", defaults.stats_start_index)), + stats_end_index=_norm_int_or_none(item.get("stats_end_index", defaults.stats_end_index)), + save_json_path=_norm_path_or_none(item.get("save_json_path", defaults.save_json_path)), ) ) return defaults, post_runs diff --git a/src/access/esmf_trace/library.py b/src/access/esmf_trace/library.py index d59178c..5ea860c 100644 --- a/src/access/esmf_trace/library.py +++ b/src/access/esmf_trace/library.py @@ -48,7 +48,21 @@ def post_summary_from_config( assert isinstance(defaults, PostSummarySettings) else: defaults = PostSummarySettings(**config_path["default_settings"]) - runs = [PostRunSettings(**r) for r in config_path["runs"]] + + merged_runs: list[PostRunSettings] = [] + for r in config_path["runs"]: + rr = dict(r) + + # inherit defaults if not explicitly set per-run + rr.setdefault("model_component", defaults.model_component) + rr.setdefault("pets", defaults.pets) + rr.setdefault("stats_start_index", defaults.stats_start_index) + rr.setdefault("stats_end_index", defaults.stats_end_index) + # do not inherit default combined save_json_path into per-run save_json_path + rr.setdefault("save_json_path", None) + + merged_runs.append(PostRunSettings(**rr)) + runs = merged_runs if post_overrides: defaults = replace(defaults, **dict(post_overrides)) diff --git a/src/access/esmf_trace/postprocess.py b/src/access/esmf_trace/postprocess.py index c783002..34ac990 100644 --- a/src/access/esmf_trace/postprocess.py +++ b/src/access/esmf_trace/postprocess.py @@ -42,7 +42,7 @@ def _slice_per_series_iloc( """ Slice rows per (group) using iloc[start:end] in each group after sorting by order_cols If both start and end are None -> no slicing (full series). - If end is None -> no slicing (full series). + If end is None -> slice from start to end of series. """ if start is None and end is None: return df @@ -142,7 +142,7 @@ def _summarise_case( tmin=("duration_s", "min"), tmax=("duration_s", "max"), tavg=("duration_s", "mean"), - tmedian=("duration_s", lambda x: x.quantile(0.50)), + tmedian=("duration_s", "median"), tstd=("duration_s", "std"), ).reset_index() @@ -174,21 +174,19 @@ def _summarise_case( ) per_output = per_output.sort_values(["__case_name", "__output_index", "model_component"], kind="mergesort") - combined_by_comp = ( - per_output.groupby(["__case_name", "model_component"], sort=False, dropna=False) - .agg( - hits=("hits", "mean"), - tmin=("tmin", "min"), - tmax=("tmax", "max"), - tavg=("tavg", "mean"), - tmedian=("tmedian", "mean"), - tstd=("tstd", "mean"), - pemin=("pemin", "min"), - pemax=("pemax", "max"), - ncpus=("ncpus", "mean"), - ) - .reset_index() - ) + grp_comp = ts.groupby(["__case_name", "model_component"], sort=False, dropna=False) + combined_by_comp = grp_comp.agg( + hits=("duration_s", "count"), + tmin=("duration_s", "min"), + tmax=("duration_s", "max"), + tavg=("duration_s", "mean"), + tmedian=("duration_s", "median"), + tstd=("duration_s", "std"), + ncpus=("pet", "nunique"), + pemin=("pet", "min"), + pemax=("pet", "max"), + ).reset_index() + combined_by_comp["__output_name"] = "combine" combined_by_comp["__row_label"] = ( combined_by_comp["__case_name"] + "/combine/" + combined_by_comp["model_component"].astype(str).str.strip() @@ -197,7 +195,7 @@ def _summarise_case( return pd.concat([per_output[output_cols], combined_by_comp[output_cols]], ignore_index=True) -def _resolve_save_json_path(save_json_path: str | None) -> Path | None: +def _resolve_save_json_path(save_json_path: str | Path | None) -> Path | None: if save_json_path is None: return None p = Path(save_json_path).expanduser() @@ -211,6 +209,8 @@ def post_summary_from_yaml( defaults: PostSummarySettings, runs: list[PostRunSettings], save_json_path: str | None = None, + include_combined: bool = True, + include_per_output: bool = True, ) -> pd.DataFrame: post_base_path: Path = Path(defaults.post_base_path) timeseries_suffix: str = defaults.timeseries_suffix @@ -239,7 +239,7 @@ def post_summary_from_yaml( continue # Save per-run json if this run specified a save path (strict .json) - per_run_save = _resolve_save_json_path(r.save_json_path) + per_run_save = _resolve_save_json_path(r.save_json_path) if r.save_json_path is not None else None if per_run_save is not None: ( case_summary.reset_index(drop=True).to_json( # ensure a clean row index @@ -256,6 +256,12 @@ def post_summary_from_yaml( # Build combined table across all selected runs combined_df = pd.concat(per_case_tables, ignore_index=True) + if not include_combined: + combined_df = combined_df[combined_df["__output_name"] != "combine"] + + if not include_per_output: + combined_df = combined_df[combined_df["__output_name"] == "combine"] + wanted_cols = ["__row_label", "hits", "tmin", "tmax", "tavg", "tmedian", "tstd", "pemin", "pemax"] combined_df = combined_df.loc[:, [c for c in wanted_cols if c in combined_df.columns]]