Skip to content

PMP enso #273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 41 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
414a83b
initial commit for enso codes
lee1043 May 5, 2025
b5d8751
rename internal function and generalize variable name
lee1043 May 5, 2025
3973278
apply changes from #271
lee1043 May 5, 2025
92ede94
update
lee1043 May 5, 2025
7c5730f
update
lee1043 May 5, 2025
06e50ff
pre-commit fix
lee1043 May 5, 2025
7719d88
pre-commit fix
lee1043 May 5, 2025
f08c655
in progress
lee1043 May 6, 2025
8ecd3f7
update
lee1043 May 6, 2025
afa9c49
Update packages/climate-ref-pmp/src/climate_ref_pmp/diagnostics/enso.py
lee1043 May 7, 2025
76c7b97
update
lee1043 May 7, 2025
b2aeb1c
update
lee1043 May 7, 2025
81e60c4
in progress
lee1043 May 8, 2025
dbcdade
in progress
lee1043 May 8, 2025
d6b7491
update
lee1043 May 8, 2025
75501a6
add change log
lee1043 May 8, 2025
4b689d4
Update environment.yml
lee1043 May 8, 2025
0098813
update
lee1043 May 8, 2025
beae40e
update
lee1043 May 8, 2025
78d8d3b
Merge remote-tracking branch 'origin/main' into 223_pmp-enso-2
lewisjared May 9, 2025
d11b581
feat: Rework so that the command is executed
lewisjared May 9, 2025
c4dd856
clean up
lee1043 May 9, 2025
a49b336
update
lee1043 May 9, 2025
5b6885b
ruff fix
lee1043 May 9, 2025
7511d09
remove enso param file as enso driver does not need it for the curren…
lee1043 May 9, 2025
cd116e9
update
lee1043 May 9, 2025
5e21ae7
generate landmask for reference per variable basis because it is poss…
lee1043 May 9, 2025
f19e52d
typo fix
lee1043 May 9, 2025
b78f6b3
update
lee1043 May 11, 2025
c986f81
update
lee1043 May 11, 2025
aec1b48
update
lee1043 May 11, 2025
477a4ae
add logger lib to the pmp env
lee1043 May 11, 2025
f606fc6
update
lee1043 May 11, 2025
136edb1
update -- bug fix
lee1043 May 12, 2025
665ba8d
update -- typo fix
lee1043 May 12, 2025
6c6a72c
update
lee1043 May 12, 2025
b488f32
adjust numpy version limit
lee1043 May 12, 2025
b9220c7
chore: Update lockfile
lewisjared May 12, 2025
9166fbc
bug fix
lee1043 May 12, 2025
c212593
update
lee1043 May 12, 2025
14fc030
typo fix
lee1043 May 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/273.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implemented PMP ENSO metrics
12 changes: 10 additions & 2 deletions packages/climate-ref-pmp/src/climate_ref_pmp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,30 @@

from climate_ref_core.dataset_registry import dataset_registry_manager
from climate_ref_core.providers import CondaDiagnosticProvider
from climate_ref_pmp.diagnostics import AnnualCycle, ExtratropicalModesOfVariability
from climate_ref_pmp.diagnostics import ENSO, AnnualCycle, ExtratropicalModesOfVariability

__version__ = importlib.metadata.version("climate-ref-pmp")

# Create the PMP diagnostics provider
# PMP uses a conda environment to run the diagnostics
provider = CondaDiagnosticProvider("PMP", __version__)

# Annual cycle diagnostics and metrics
provider.register(AnnualCycle())

# ENSO diagnostics and metrics
provider.register(ENSO("ENSO_perf")) # Assigned to ESMValTool
provider.register(ENSO("ENSO_tel"))
provider.register(ENSO("ENSO_proc"))

# Extratropical modes of variability diagnostics and metrics
provider.register(ExtratropicalModesOfVariability("PDO"))
provider.register(ExtratropicalModesOfVariability("NPGO"))
provider.register(ExtratropicalModesOfVariability("NAO"))
provider.register(ExtratropicalModesOfVariability("NAM"))
provider.register(ExtratropicalModesOfVariability("PNA"))
provider.register(ExtratropicalModesOfVariability("NPO"))
provider.register(ExtratropicalModesOfVariability("SAM"))
provider.register(AnnualCycle())


dataset_registry_manager.register(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""PMP diagnostics."""

from climate_ref_pmp.diagnostics.annual_cycle import AnnualCycle
from climate_ref_pmp.diagnostics.enso import ENSO
from climate_ref_pmp.diagnostics.variability_modes import ExtratropicalModesOfVariability

__all__ = [
"ENSO",
"AnnualCycle",
"ExtratropicalModesOfVariability",
]
216 changes: 216 additions & 0 deletions packages/climate-ref-pmp/src/climate_ref_pmp/diagnostics/enso.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import json
import os
from collections.abc import Iterable
from typing import Any

from loguru import logger

from climate_ref_core.datasets import DatasetCollection, FacetFilter, SourceDatasetType
from climate_ref_core.diagnostics import (
CommandLineDiagnostic,
DataRequirement,
ExecutionDefinition,
ExecutionResult,
)
from climate_ref_pmp.pmp_driver import _get_resource, process_json_result


class ENSO(CommandLineDiagnostic):
"""
Calculate the ENSO performance metrics for a dataset
"""

facets = ()

def __init__(self, metrics_collection: str) -> None:
self.name = metrics_collection
self.slug = metrics_collection.lower()
self.metrics_collection = metrics_collection
self.parameter_file = "pmp_param_enso.py"

# TO DO: sftlf and areacell
# TO DO: Get the path to the files per variable
def _get_data_requirements(
metrics_collection: str,
extra_experiments: str | tuple[str, ...] | list[str] = (),
) -> tuple[DataRequirement, DataRequirement]:
if metrics_collection == "ENSO_perf":
model_variables: tuple[str, ...] = ("pr", "ts", "tauu")
obs_sources: tuple[str, ...] = ("GPCP-2-3", "ERA-INT")
elif metrics_collection == "ENSO_tel":
model_variables = ("pr", "ts")
obs_sources = ("GPCP-2-3", "ERA-INT")
elif metrics_collection == "ENSO_proc":
model_variables = ("ts", "taux", "hfls", "hfss", "rlds", "rlus", "rsds", "rsus")
obs_sources = ("GPCP-2-3", "ERA-INT", "TropFlux")
else:
raise ValueError(
f"Unknown metrics collection: {metrics_collection}. "
"Valid options are: ENSO_perf, ENSO_tel, ENSO_proc"
)

obs_variables = model_variables

filters = [
FacetFilter(
facets={
"frequency": "mon",
"experiment_id": ("historical", *extra_experiments),
"variable_id": model_variables,
}
)
]

return (
DataRequirement(
source_type=SourceDatasetType.obs4MIPs,
filters=(FacetFilter(facets={"source_id": obs_sources, "variable_id": obs_variables}),),
group_by=("activity_id",),
),
DataRequirement(
source_type=SourceDatasetType.CMIP6,
filters=tuple(filters),
group_by=("source_id", "experiment_id", "member_id"),
),
)

self.data_requirements = _get_data_requirements(metrics_collection)

def build_cmd(self, definition: ExecutionDefinition) -> Iterable[str]:
"""
Run the diagnostic on the given configuration.

Parameters
----------
definition : ExecutionDefinition
The configuration to run the diagnostic on.

Returns
-------
:
The result of running the diagnostic.
"""
mc_name = self.metrics_collection

# ------------------------------------------------
# Get the input datasets information for the model
# ------------------------------------------------
input_datasets = definition.datasets[SourceDatasetType.CMIP6]
source_id = input_datasets["source_id"].unique()[0]
experiment_id = input_datasets["experiment_id"].unique()[0]
member_id = input_datasets["member_id"].unique()[0]
variable_ids = input_datasets["variable_id"].unique()
mod_run = f"{source_id}_{member_id}"

dict_mod: dict[str, dict[str, Any]] = {}
dict_mod[mod_run] = {}

def extract_variable(dc: DatasetCollection, variable: str) -> list[str]:
return dc.datasets[input_datasets["variable_id"] == variable]["path"].to_list() # type: ignore

# TO DO: Get the path to the files per variable
for variable in variable_ids:
list_files = extract_variable(input_datasets, variable)
list_areacella = extract_variable(input_datasets, "areacella")
list_sftlf = extract_variable(input_datasets, "sftlf")

if len(list_files) > 0:
dict_mod[mod_run][variable] = {
"path + filename": list_files,
"varname": variable,
"path + filename_area": list_areacella,
"areaname": "areacella",
"path + filename_landmask": list_sftlf,
"landmaskname": "sftlf",
}

# -------------------------------------------------------
# Get the input datasets information for the observations
# -------------------------------------------------------
reference_dataset = definition.datasets[SourceDatasetType.obs4MIPs]
reference_dataset_names = reference_dataset["source_id"].unique()

dict_obs: dict[str, dict[str, Any]] = {}

# TO DO: Get the path to the files per variable and per source
for obs_name in reference_dataset_names:
dict_obs[obs_name] = {}
for variable in variable_ids:
# Get the list of files for the current variable and observation source
list_files = reference_dataset.datasets[
(reference_dataset["variable_id"] == variable)
& (reference_dataset["source_id"] == obs_name)
]["path"].to_list()
# If the list is not empty, add it to the dictionary
if len(list_files) > 0:
dict_obs[obs_name][variable] = {
"path + filename": list_files,
"varname": variable,
}

# Create input directory
dictDatasets = {
"model": dict_mod,
"observations": dict_obs,
"metricsCollection": mc_name,
"experiment_id": experiment_id,
}

# Create JSON file for dictDatasets
json_file = os.path.join(
definition.output_directory, f"input_{mc_name}_{source_id}_{experiment_id}_{member_id}.json"
)
with open(json_file, "w") as f:
json.dump(dictDatasets, f, indent=4)
logger.debug(f"JSON file created: {json_file}")

driver_file = _get_resource("climate_ref_pmp.drivers", "enso_driver.py", use_resources=True)
return [
"python",
driver_file,
"--metrics_collection",
mc_name,
"--experiment_id",
experiment_id,
"--input_json_path",
json_file,
"--output_directory",
str(definition.output_directory),
]

def build_execution_result(self, definition: ExecutionDefinition) -> ExecutionResult:
"""
Build a diagnostic result from the output of the PMP driver

Parameters
----------
definition
Definition of the diagnostic execution

Returns
-------
Result of the diagnostic execution
"""
input_datasets = definition.datasets[SourceDatasetType.CMIP6]
source_id = input_datasets["source_id"].unique()[0]
experiment_id = input_datasets["experiment_id"].unique()[0]
member_id = input_datasets["member_id"].unique()[0]
mc_name = self.metrics_collection
pattern = f"{mc_name}_{source_id}_{experiment_id}_{member_id}"

results_files = list(definition.output_directory.glob(f"{pattern}.json"))
if len(results_files) != 1: # pragma: no cover
logger.warning(f"A single cmec output file not found: {results_files}")
return ExecutionResult.build_from_failure(definition)

# Find the other outputs
png_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.png")]
data_files = [definition.as_relative_path(f) for f in definition.output_directory.glob("*.nc")]

cmec_output, cmec_metric = process_json_result(results_files[0], png_files, data_files)

return ExecutionResult.build_from_output_bundle(
definition,
cmec_output_bundle=cmec_output,
cmec_metric_bundle=cmec_metric,
)
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ def __init__(self, mode_id: str):
self.name = f"Extratropical modes of variability: {mode_id}"
self.slug = f"extratropical-modes-of-variability-{mode_id.lower()}"

def get_data_requirements(
def _get_data_requirements(
obs_source: str,
obs_variable: str,
cmip_variable: str,
model_variable: str,
extra_experiments: str | tuple[str, ...] | list[str] = (),
) -> tuple[DataRequirement, DataRequirement]:
filters = [
FacetFilter(
facets={
"frequency": "mon",
"experiment_id": ("historical", "hist-GHG", "piControl", *extra_experiments),
"variable_id": cmip_variable,
"variable_id": model_variable,
}
)
]
Expand All @@ -70,10 +70,10 @@ def get_data_requirements(

if self.mode_id in self.ts_modes:
self.parameter_file = "pmp_param_MoV-ts.py"
self.data_requirements = get_data_requirements("HadISST-1-1", "ts", "ts")
self.data_requirements = _get_data_requirements("HadISST-1-1", "ts", "ts")
elif self.mode_id in self.psl_modes:
self.parameter_file = "pmp_param_MoV-psl.py"
self.data_requirements = get_data_requirements("20CR", "psl", "psl", extra_experiments=("amip",))
self.data_requirements = _get_data_requirements("20CR", "psl", "psl", extra_experiments=("amip",))
else:
raise ValueError(
f"Unknown mode_id '{self.mode_id}'. Must be one of {self.ts_modes + self.psl_modes}"
Expand Down
Loading
Loading