From db7a27de1c54e5edccb27f5a541b9b17e916cf4f Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 18 Feb 2025 09:23:02 +0100 Subject: [PATCH 1/8] wip: start of ai rewrite of esm_environment, probably nothing works out of the box --- setup.py | 3 + src/esm_environment/esm_environment.py | 864 ++++++------------ src/esm_environment/esm_environment_bkup.py | 633 +++++++++++++ templates/__init__.py | 0 templates/esm_environment/__init__.py | 0 templates/esm_environment/base.sh.j2 | 9 + templates/esm_environment/exports.sh.j2 | 13 + templates/esm_environment/headers/pbs.sh.j2 | 22 + templates/esm_environment/headers/slurm.sh.j2 | 22 + .../esm_environment/module_actions.sh.j2 | 22 + templates/esm_environment/unset.sh.j2 | 5 + 11 files changed, 1003 insertions(+), 590 deletions(-) create mode 100644 src/esm_environment/esm_environment_bkup.py create mode 100644 templates/__init__.py create mode 100644 templates/esm_environment/__init__.py create mode 100644 templates/esm_environment/base.sh.j2 create mode 100644 templates/esm_environment/exports.sh.j2 create mode 100644 templates/esm_environment/headers/pbs.sh.j2 create mode 100644 templates/esm_environment/headers/slurm.sh.j2 create mode 100644 templates/esm_environment/module_actions.sh.j2 create mode 100644 templates/esm_environment/unset.sh.j2 diff --git a/setup.py b/setup.py index 2b8b0dca7..4f4a1b14c 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ "PyGithub==1.55", "colorama==0.4.5", "coloredlogs==15.0.1", # NOTE(PG): Should be removed during cleanup for loguru instead + "dpath", "emoji==1.7.0", "f90nml==1.4.2", "gfw-creator==0.2.2", @@ -106,12 +107,14 @@ "esm_tools.namelists": "namelists", "esm_tools.runscripts": "runscripts", "esm_tools.couplings": "couplings", + "esm_tools.templates": "templates", }, package_data={ "esm_tools.configs": ["../configs/*"], "esm_tools.namelists": ["../namelists/*"], "esm_tools.runscripts": ["../runscripts/*"], "esm_tools.couplings": ["../couplings/*"], + "esm_tools.templates": ["../templates/*"], }, setup_requires=setup_requirements, test_suite="tests", diff --git a/src/esm_environment/esm_environment.py b/src/esm_environment/esm_environment.py index adf8b96a1..f2d95d8fd 100644 --- a/src/esm_environment/esm_environment.py +++ b/src/esm_environment/esm_environment.py @@ -1,633 +1,317 @@ -#!/usr/bin/env python """ -Main module for EsmEnvironment. +ESM-Tools Batch Script Template Module +==================================== + +A template-based script generation system for HPC environments using Jinja2. +Supports generation of both simple shell scripts and batch job scripts +(SLURM, PBS) with appropriate headers and environment setup. + +This module modernizes the environment generation approach in ESM-Tools by providing +a template-driven solution for creating batch environment setup scripts. It replaces the +previous string-based generation with a more maintainable and flexible template system. + +Examples +-------- +>>> from batch_script_template import BatchScriptTemplate +>>> config = { +... "sh_interpreter": "/bin/bash", +... "module_actions": ["load python", "load netcdf"], +... "export_vars": {"PATH": "/usr/local/bin:$PATH"} +... } +>>> batch = BatchScriptTemplate(config) +>>> script = batch.render() + +>>> # Batch job script +>>> config.update({ +... "batch_system": "slurm", +... "job": { +... "name": "test_job", +... "tasks": 4, +... "time": "01:00:00", +... } +... }) +>>> batch = BatchScriptTemplate(config) +>>> script = batch.render() + +See Also +-------- +jinja2.Environment : The underlying template engine used +Path : Python's pathlib.Path used for template directory management +dpath : Deep dictionary utilities for nested updates """ -import copy -import os -import warnings -import re -import sys +import importlib.resources as pkg_resources +from pathlib import Path +from typing import Any, Dict, Optional -import esm_parser +import dpath +from jinja2 import Environment, FileSystemLoader -###################################################################################### -########################### class "environment_infos" ################################ -###################################################################################### - -class EnvironmentInfos: +class BatchScriptTemplate: """ - The package ``esm_environment`` takes care of generating the environments for the - different HPCs supported by `ESM-Tools`. This is done through the use of the - ``EnvironmentInfos`` class inside the different `ESM-Tools` packages. - - For the correct definition of an environment for an HPC a `yaml` file for that - system needs to be included inside the ``esm_tools`` package inside the - ``configs/machines/`` folder (e.g. ``ollie.yaml``). This file should contain all - the required preset variables for that system and the environment variables - ``module_actions`` and ``export_vars``. - - By instancing the ``EnvironmentInfos`` class, the environment information for - the specified model or coupled setup is compiled and stored in - ``self.commands``. If there are environment variables inside the ``general`` - section, ``__init__`` will ignore the environment variables from the standalone - component files, and it will define the ``general.environment_changes`` for - each component of the setup. + A template engine for generating shell and batch job scripts. + + This class provides functionality to render both simple shell scripts and + batch job scripts using Jinja2 templates. When configured for batch jobs, + it handles batch system headers (SLURM/PBS), and in all cases manages + module loading commands, environment variable exports, and variable unset commands. Parameters ---------- - run_or_compile : str - A string indicating whether ``EnvironmentInfos`` was instanced from a - compilation operation (``compiletime``) or a run (``runtime``). - complete_config : dict - Dictionary containing all the compiled information from the `yaml` files - needed for the current `ESM-Tools` operation. - model : string - Model for which the environment is required. If not defined, this method - will loop through all the available keys in ``complete_config``. + config : Dict[str, Any] + Configuration dictionary containing script setup parameters. + Expected keys include: + - sh_interpreter : str + Shell interpreter to use (default: /bin/bash) + - batch_system : str, optional + Either 'slurm' or 'pbs'. If not provided, generates a simple + shell script without batch headers. + - job : dict, optional + Job-specific settings (name, tasks, time, etc.). + Required if batch_system is specified. + - module_actions : list + List of module commands to execute + - export_vars : dict + Dictionary of environment variables to export + - unset_vars : list + List of variables to unset + template_dir : Path, optional + Directory containing custom Jinja2 templates for script generation. + If not provided, uses the default templates from the package. + + Attributes + ---------- + env : jinja2.Environment + The Jinja2 environment instance used for template rendering. + config : Dict[str, Any] + The configuration dictionary used for rendering templates. + batch_system : Optional[str] + The batch system type ('slurm', 'pbs', or None for simple scripts) + + Notes + ----- + The template directory should contain the following structure: + templates/ + ├── base.sh.j2 # Base shell script template + ├── headers/ + │ ├── slurm.sh.j2 # SLURM header template + │ └── pbs.sh.j2 # PBS header template + ├── module_actions.sh.j2 # Module loading template + ├── exports.sh.j2 # Environment exports template + └── unset.sh.j2 # Unset variables template + + Examples + -------- + Simple shell script: + >>> config = { + ... "module_actions": ["load intel"], + ... "export_vars": {"OMP_NUM_THREADS": "4"} + ... } + >>> batch = BatchScriptTemplate(config) + >>> print(batch.render()) + #!/bin/bash -l + + module load intel + export OMP_NUM_THREADS=4 + + SLURM job script: + >>> config.update({ + ... "batch_system": "slurm", + ... "job": { + ... "name": "test_job", + ... "tasks": 4, + ... "time": "01:00:00", + ... "partition": "compute" + ... } + ... }) + >>> batch = BatchScriptTemplate(config) + >>> print(batch.render()) + #!/bin/bash -l + #SBATCH --job-name=test_job + #SBATCH --ntasks=4 + #SBATCH --time=01:00:00 + #SBATCH --partition=compute + + module load intel + export OMP_NUM_THREADS=4 + + See Also + -------- + jinja2.Environment : The template engine used by this class + dpath : Library used for deep dictionary updates """ - def __init__(self, run_or_compile, complete_config=None, model=None): - # Ensure local copy of complete config to avoid mutating it... (facepalm) - complete_config = copy.deepcopy(complete_config) - # Load computer dictionary or initialize it from the correct machine file - if complete_config and "computer" in complete_config: - self.config = complete_config["computer"] - else: - self.machine_file = esm_parser.determine_computer_yaml_from_hostname() - self.config = esm_parser.yaml_file_to_dict(self.machine_file) - esm_parser.basic_choose_blocks(self.config, self.config) - esm_parser.recursive_run_function( - [], - self.config, - "atomic", - esm_parser.find_variable, - self.config, - [], - True, - ) - - # Add_s can only be inside choose_ blocks in the machine file - for entry in ["add_module_actions", "add_export_vars", "add_unset_vars"]: - if entry in self.config: - del self.config[entry] - - # Load the general environments if any - self.general_environment(complete_config, run_or_compile) - - # If the model is defined during the instantiation of the class (e.g. - # during esm_master with a coupled setup), get the environment for that - # model. Otherwise, loop through all the keys of the complete_config dictionary - if model: - self.apply_config_changes(run_or_compile, complete_config, model) - else: - for model in complete_config: - self.apply_config_changes(run_or_compile, complete_config, model) - - # Add the ENVIRONMENT_SET_BY_ESMTOOLS into the exports - self.add_esm_var() - - # Define the environment commands for the script - self.commands = self.get_shell_commands() - - def add_esm_var(self): - """ - Adds the ENVIRONMENT_SET_BY_ESMTOOLS=TRUE to the config, for later - dumping to the shell script. - """ - - if "export_vars" in self.config: - self.config["export_vars"]["ENVIRONMENT_SET_BY_ESMTOOLS"] = "TRUE" - else: - self.config["export_vars"] = {"ENVIRONMENT_SET_BY_ESMTOOLS": "TRUE"} - - def apply_config_changes(self, run_or_compile, config, model): - """ - Calls ``apply_model_changes`` with the selected configuration for the - ``model``. - """ - - self.apply_model_changes( - model, run_or_compile=run_or_compile, modelconfig=config[model] - ) - - def apply_model_changes(self, model, run_or_compile="runtime", modelconfig=None): + def __init__( + self, config: Dict[str, Any], template_dir: Optional[Path] = None + ) -> None: """ - Applies the ``environment_changes``, ``compiletime_environment_changes``, - and/or ``runtime_environment_changes`` to the environment configuration of the - ``model`` component. Note that ``model`` can be either a component (e.g. - ``fesom``) or ``general``. + Initialize the BatchScriptTemplate with configuration and optional templates. Parameters ---------- - model : str - Name of the component for which changes will apply. - run_or_compile : str - A string indicating whether ``EnvironmentInfos`` was instanced from a - compilation operation (``compiletime``) or a run (``runtime``). - modelconfig : dict - Information compiled from the `yaml` files for this specific component. + config : Dict[str, Any] + Configuration dictionary for script setup. + template_dir : Path, optional + Path to custom template directory. If None, uses package defaults. """ + self.config = config + self.batch_system = config.get("batch_system", "").lower() or None - # Merge whatever is relevant to this environment operation (either compile or - # run) to ``environment_changes``, taking care of solving possible ``choose_`` - # blocks - thesechanges = run_or_compile + "_environment_changes" - if thesechanges in modelconfig: - # kh 16.09.20 the machine name is already handled here - # additionally handle different versions of the model (i.e. - # choose_version...) for each machine if this is possible here in a more - # generic way, it can be refactored - if "choose_version" in modelconfig[thesechanges]: - if "version" in modelconfig: - if ( - modelconfig["version"] - in modelconfig[thesechanges]["choose_version"] - ): - for k, v in modelconfig[thesechanges]["choose_version"][ - modelconfig["version"] - ].items(): - # kh 16.09.20 move up one level and replace default - modelconfig[thesechanges][k] = v - del modelconfig[thesechanges]["choose_version"] - - # Perform the merging of the environment dictionaries - if "environment_changes" in modelconfig: - modelconfig["environment_changes"].update(modelconfig[thesechanges]) - else: - modelconfig["environment_changes"] = modelconfig[thesechanges] - - if "environment_changes" in modelconfig: - for entry in ["add_module_actions", "add_export_vars", "add_unset_vars"]: - # Initialize the environment variables - if not entry in self.config: - if entry in ["add_module_actions", "add_unset_vars"]: - self.config[entry] = [] - elif entry == "add_export_vars": - self.config[entry] = {} - - if entry == "add_export_vars": - # Transform any list whose name contains add_export_vars into a - # dictionary (machine-file export_vars are from now on always a - # dictionary but add_export_vars of components and setups are - # allowed to be lists for retro-compatibility) - self.turn_add_export_vars_to_dict(modelconfig, entry) - - # Merge the ``environment_changes`` into the general ``config`` - self.config.update(modelconfig["environment_changes"]) - # Change any ``choose_computer.*`` block in ``config`` to ``choose_*`` - self.remove_computer_from_choose(self.config) - - # Resolve ``choose_`` blocks - esm_parser.basic_choose_blocks(self.config, self.config) - - # Remove the environment variables from the config - for entry in ["add_module_actions", "add_export_vars", "add_unset_vars"]: - if entry in self.config: - del self.config[entry] - - def turn_add_export_vars_to_dict(self, modelconfig, entry): - """ - Turns the given ``entry`` in ``modelconfig`` (normally ``add_export_vars``) into - a dictionary, if it is not a dictionary yet. This function is necessary for - retro-compatibility of configuration files having ``add_export_vars`` defined as - list of strings, instead of as dictionaries. - - Parameters - ---------- - modelconfig : dict - Information compiled from the `yaml` files for this specific component. - entry : str - The environment variable (originally developed for ``add_export_vars``) to - be turned into a dictionary. - """ - - # Find the variables whose names contains the entry (e.g. add_export_vars) - path_sep = "," - entry_paths = esm_parser.find_key( - modelconfig["environment_changes"], - entry, - paths2finds=[], - sep=path_sep, - ) - # Loop through the variables - for entry_path in entry_paths: - # Split the path and define the export_dict dictionary that links to the - # current entry. Later, if the content of export_dict is a list it will be - # turned into a dictionary itself - path_to_var = entry_path.split(path_sep) - path_to_var = [esm_parser.convert(leaf) for leaf in path_to_var] - if len(path_to_var) > 1: - export_dict = esm_parser.find_value_for_nested_key( - modelconfig["environment_changes"], - path_to_var[-2], - path_to_var[:-2], - ) - else: - export_dict = modelconfig["environment_changes"] - # Get the value of export_dict - export_vars = export_dict[path_to_var[-1]] - - # If export_vars is a list transform it into a dictionary - if isinstance(export_vars, list): - self.env_list_to_dict(export_dict, path_to_var[-1]) - - def env_list_to_dict(self, export_dict, key): - """ - Transforms lists in ``export_dict`` in dictionaries. This allows to add lists of - ``export_vars`` to the machine-defined ``export_vars`` that should always be a - dictionary. Note that lists are always added at the end of the ``export_vars``, - if you want to edit variables of an already existing dictionary make your - ``export_var`` be a dictionary. - - Avoids destroying repetitions of elements by adding indexes to the keys of the - newly transformed dictionary, for example: - - .. code-block::yaml - your_model: - environment_changes: - add_export_vars: - - 'SOMETHING=dummy' - - 'somethingelse=dummy' - - 'SOMETHING=dummy' - - The ``export_dict[key]`` (where ``key = add_export_vars``) will be transformed - in this function from being a list to be the following dictionary: - - .. code-block::yaml - 'SOMETHING=dummy[(0)][(list)]': 'SOMETHING=dummy' - 'somethingelse=dummy[(0)][(list)]': 'somethingelse=dummy' - 'SOMETHING=dummy[(1)][(list)]': "SOMETHING=dummy' - - Note that, once all the environments are resolved, and before writing the - exports in the bash files, the ``export_vars`` dictionary is transformed again - into a list and the indexes and ``[(list)]`` strings are removed. - - Parameters - ---------- - export_dict : dict - ``export_var`` dictionary which value is a list. This list is transformed - into a dictionary. - key : str - The key to the value. - """ - # Load the value - export_vars = export_dict[key] - # Check if the value is a list TODO: logging - if not isinstance(export_vars, list): - print( - f"The only reason to use this function is if {key} is a list, and it " - + "is not in this case..." + if self.batch_system and self.batch_system not in ["slurm", "pbs"]: + raise ValueError( + "batch_system, if specified, must be either 'slurm' or 'pbs'" ) - sys.exit(1) - - # Loop through the elements of the list - new_export_vars = {} - for var in export_vars: - # Initialize index - index = 0 - while True: - # If the key with the current index already exists move the move the - # index forward - if var + f"[({index})][(list)]" in new_export_vars: - index += 1 - # If the key with the current index does not exist yet, add the element - # to the dictionary - else: - new_export_vars[f"{var}[({index})][(list)]"] = var - break - - # Redefined the transformed dictionary - export_dict[key] = new_export_vars - - def general_environment(self, complete_config, run_or_compile): - """ - Checks if there are ``environment_changes`` inside the ``general`` section, and - if that is the case, ignore the changes loaded from the component files. - - Parameters - ---------- - complete_config : dict - Dictionary containing all the compiled information from the `yaml` files - needed for the current `ESM-Tools` operation. - run_or_compile : str - A string indicating whether ``EnvironmentInfos`` was instanced from a - compilation operation (``compiletime``) or a run (``runtime``). - """ - # If the general section exists load the general environments - general_env = False - if "general" in complete_config: - # Is it a coupled setup? - coupled_setup = complete_config["general"].get("coupled_setup", False) - - # Check if a general setup environment exists that will overwrite the - # component setups - if coupled_setup and ( - "compiletime_environment_changes" in complete_config["general"] - or "runtime_environment_changes" in complete_config["general"] - or "environment_changes" in complete_config["general"] - ): # TODO: do this if the model include other models and the environment is - # labelled as priority over the other models environment (OIFS case) - general_env = True - self.apply_config_changes(run_or_compile, complete_config, "general") - - # If there is a general environment remove all the model specific environments - # defined in the model files and preserve only the model specific environments - # that are explicitly defined in the setup file - if general_env: - self.load_component_env_changes_only_in_setup(complete_config) - - def load_component_env_changes_only_in_setup(self, complete_config): - """ - Removes all the model specific environments defined in the component files and - preserve only the component-specific environments that are explicitly defined in - the setup file. - - Parameters - ---------- - complete_config : dict - Dictionary containing all the compiled information from the `yaml` files - needed for the current `ESM-Tools` operation. - """ - - # Get necessary variables - setup = complete_config.get("general", {}).get("model", None) - version = str(complete_config.get("general", {}).get("version", None)) - models = complete_config.get("general", {}).get("models", None) - # Check for errors TODO: logging - if not models: - print( - "Use the EnvironmentInfos.load_component_env_changes_only_in_setup " - + "method only if complete_config has a general chapter that includes " - + "a models list" + if self.batch_system and "job" not in config: + raise ValueError( + "job configuration is required when batch_system is specified" ) - sys.exit(1) - # Find the setup file - include_path, needs_load = esm_parser.look_for_file( - setup, - setup + "-" + version, + if template_dir is None: + # Use the package's default templates + with pkg_resources.path("esm_tools.templates", "") as template_path: + template_dir = template_path / "esm_environment" + + self.env = Environment( + loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True ) - # If setup file not found throw and error TODO: logging - if not include_path: - print(f"File for {setup}-{version} not found") - sys.exit(1) - # Load the file TODO: logging - if needs_load: - setup_config = esm_parser.yaml_file_to_dict(include_path) - else: - print(f"A setup needs to load a file so this line shouldn't be reached") - sys.exit(1) - - # Add the attachment files (e.g. the environment variables can be in a - # further_reading file) - for attachment in esm_parser.CONFIGS_TO_ALWAYS_ATTACH_AND_REMOVE: - # Add the attachment file chapters (e.g. there is a further_reading chapter - # at the same level of general and the components) - esm_parser.attach_to_config_and_remove(setup_config, attachment) - # Add the attachment files in each chapter (i.e. in general, components, - # etc.) - for component in list(setup_config): - esm_parser.attach_to_config_and_remove( - setup_config[component], - attachment, - ) - # Define the possible environment variables - environment_vars = [ - "environment_changes", - "compiletime_environment_changes", - "runtime_environment_changes", - ] - # Loop through the models - for model in models: - # Sanity check TODO: logging - if model not in complete_config: - print(f"The chapter {model} does not exist in complete_config") - sys.exit(1) - # Load the configuration of this model - model_config = complete_config[model] - # Loop through the possible environment variables - for env_var in environment_vars: - # If the environment variable exists replace it with the one defined in - # the setup file for that model: - # 1. Delete the variable - if env_var in model_config: - del model_config[env_var] - # 2. Redefine the variable - if env_var in setup_config.get(model, {}): - # Solve any unresolved variables in the reloaded setup environment - # TODO: change this to be out of the loop using the method - # ``model_config.finalize()``, currently not working due to - # a problem with the dates - esm_parser.recursive_run_function( - [], - setup_config[model][env_var], - "atomic", - esm_parser.find_variable, - complete_config, - {}, - {}, - ) - # Actually redefine the variable - model_config[env_var] = setup_config[model][env_var] - - def replace_model_dir(self, model_dir): + def render(self, include_set_e: bool = True) -> str: """ - Replaces any instances of ${model_dir} in the config section - "export_vars" with the argument + Render a complete script, optionally including batch system headers. Parameters ---------- - model_dir : str - The replacement string for ${model_dir} - """ - for entry in ["export_vars"]: - if entry in self.config: - newlist = [] - for line in self.config[entry]: - newline = line.replace("${model_dir}", model_dir) - newlist.append(newline) - self.config[entry] = newlist - - def get_shell_commands(self): - """ - Gathers module actions and export variables from the config to a list, - prepending appropriate shell command words (e.g. module and export). - - If the ``export_vars`` dictionary contains variables with repetition - indexes (``[(int)]``) or ``[(list)]``, those are removed before returning the - command list. + include_set_e : bool, optional + Whether to include 'set -e' in the script (default: True) Returns ------- - environment : list - A list of the environment operations, to be used in the compilation and run - scripts. - """ - - environment = [] - # Fix for seb-wahl's hack via source - if self.config.get("general_actions") is not None: - for action in self.config["general_actions"]: - environment.append(action) - # Write module actions - if self.config.get("module_actions") is not None: - for action in self.config["module_actions"]: - # seb-wahl: workaround to allow source ... to be added to the batch header - # until a proper solution is available. Required with FOCI - if action.startswith("source"): - environment.append(action) - else: - environment.append(f"module {action}") - # Write Spack actions - if self.config.get("spack_actions") is not None: - for action in self.config["spack_actions"]: - environment.append(f"spack {action}") - # Add an empty string as a newline: - environment.append("") - if self.config.get("export_vars") is not None: - for var in self.config["export_vars"]: - # If export_vars is a dictionary - if isinstance(self.config["export_vars"], dict): - # If the variable is a dictionary itself (e.g. "AWI_FESOM_YAML" - # in fesom-1.4) add the contents of the dictionary as the value of - # the exported variable inside '""' - if isinstance(self.config["export_vars"][var], dict): - key = var - value = self.config["export_vars"][key] - environment.append(f"export {key}='{str(value)}'") - # If the value of the variable is not a dictionary - else: - key = var - value = self.config["export_vars"][key] - # Define the pattern for indexes [(int)] - ipattern = "\[+\(\d+\)+\]$" - # If the variable was added as a list produce the correct string - if key.endswith("[(list)]"): - key = key.replace("[(list)]", "") - environment.append(f"export {value}") - # If the variable contained a repetition index, remove it - elif re.search(ipattern, key): - environment.append( - f"export {re.sub(ipattern, '', key)}={str(value)}" - ) - # It it is a normal variable return the export command - else: - environment.append(f"export {key}={str(value)}") - # If export_vars is a list append the export command (this should not - # happen anymore as the export_vars in the machine files should be all - # defined now as dictionaries - else: - environment.append("export {str(var)}") - environment.append("") - # Write the unset commands - if self.config.get("unset_vars") is not None: - for var in self.config["unset_vars"]: - environment.append(f"unset {var}") - - return environment - - def write_dummy_script(self, include_set_e=True): + str + The rendered script as a string. + + Examples + -------- + >>> config = { + ... "sh_interpreter": "/bin/bash", + ... "module_actions": ["load intel/2020"], + ... "export_vars": {"MPI_ROOT": "/opt/mpi"} + ... } + >>> script_template = BatchScriptTemplate(config) + >>> script = script_template.render() + + Notes + ----- + The rendered script will include sections for: + 1. Shell interpreter specification + 2. Batch system headers (if batch_system is specified) + 3. Module loading commands + 4. Environment variable exports + 5. Variable unset commands + + Raises + ------ + jinja2.TemplateNotFound + If the required template files are not found in template_dir + jinja2.TemplateError + If there are syntax errors in the templates """ - Writes a dummy script containing only the header information, module - commands, and export variables. The actual compile/configure commands - are added later. + script_parts = [] - Parameters - ---------- - include_set_e : bool - Default to True, whether or not to include a ``set -e`` at the - beginning of the script. This causes the shell to stop as soon as - an error is encountered. - """ - # Check for sh_interpreter - if "sh_interpreter" not in self.config: - print('WARNING: "sh_interpreter" not defined in the machine yaml') - with open("dummy_script.sh", "w") as script_file: - # Write the file headings - script_file.write( - f'#!{self.config.get("sh_interpreter", "/bin/bash")} -l\n' - ) - script_file.write( - "# Dummy script generated by esm-tools, to be removed later: \n" + # Add batch system header if specified + if self.batch_system: + header_template = self.env.get_template( + f"headers/{self.batch_system}.sh.j2" ) - if include_set_e: - script_file.write("set -e\n") + header = header_template.render(job=self.config.get("job", {})) + script_parts.append(header) + + # Add environment setup + env_template = self.env.get_template("base.sh.j2") + environment = env_template.render( + sh_interpreter=self.config.get("sh_interpreter", "/bin/bash"), + include_set_e=include_set_e, + module_actions=self.config.get("module_actions", []), + export_vars=self.config.get("export_vars", {}), + unset_vars=self.config.get("unset_vars", []), + general_actions=self.config.get("general_actions", []), + spack_actions=self.config.get("spack_actions", []), + ) - # Write the module and export commands - for command in self.commands: - script_file.write(f"{command}\n") - script_file.write("\n") + # Since batch system was added at the very beginning, we need to make sure + # that the shell interpreter line is still at the top: + environment = environment.split("\n") + shell_interpreter = environment.pop(0) # Remove shell interpreter line + script_parts.append(environment) - def remove_computer_from_choose(self, chapter): - """ - Recursively remove ``computer.`` from all the `choose_` keys. + # Add shell interpreter line at the beginning + script_parts.insert(0, shell_interpreter) - Parameters - ---------- - chapter : dict - Dictionary to search for ``choose_computer.`` blocks. - """ - all_keys = list(chapter.keys()) - for key in all_keys: - if isinstance(key, str) and "choose_computer." in key: - newkey = key.replace("computer.", "") - chapter[newkey] = chapter[key] - del chapter[key] - key = newkey - if isinstance(chapter[key], dict): - self.remove_computer_from_choose(chapter[key]) - - @staticmethod - def cleanup_dummy_script(): - """ - Removes the ``dummy_script.sh`` if it exists. - """ - try: - os.remove("dummy_script.sh") - except OSError: - print("No file dummy_script.sh there; nothing to do...") + return "\n".join(script_parts) - @staticmethod - def add_commands(commands, name): + def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> None: """ - Writes all commands in a list to a file named ``_script.sh``, - located in the current working directory. The header from this script - is read from ``dummy_script.sh``, also in the current working - directory. + This method uses dpath to perform deep dictionary updates, allowing for nested + key updates without overwriting entire subdictionaries. Parameters ---------- - commands : list of str - List of the commands to write to the file after the header - name : str - Name of the script, generally something like ``comp_echam-6.3.05`` - - Returns - ------- - str : - ``name`` + "_script.sh" + new_config : Dict[str, Any] + New configuration values to update or add. Can contain nested paths. + separator : str, optional + Separator to use for nested paths in dpath (default: '/') + + Examples + -------- + Simple update: + >>> batch = BatchScriptTemplate({"sh_interpreter": "/bin/bash"}) + >>> batch.update_config({"module_actions": ["load intel"]}) + + Nested update: + >>> # Initial configuration: + >>> initial_config = { + ... "export_vars": { + ... "PATH": "/usr/bin", + ... "nested": {"key": "old_value"} + ... } + ... } + >>> batch = BatchScriptTemplate(initial_config) + >>> print("Initial config:", batch.config) + Initial config: { + 'export_vars': { + 'PATH': '/usr/bin', + 'nested': {'key': 'old_value'} + } + } + + >>> # Perform nested updates: + >>> batch.update_config({ + ... "export_vars/nested/key": "new_value", + ... "export_vars/PATH": "/new/path" + ... }) + >>> print("Updated config:", batch.config) + Updated config: { + 'export_vars': { + 'PATH': '/new/path', + 'nested': {'key': 'new_value'} + } + } + + Notes + ----- + The update is performed in-place on the instance's config dictionary. + Nested paths in the new_config can be specified using the separator. + + See Also + -------- + dpath : Library used for the deep dictionary updates """ - if commands: - with open(f"{name}_script.sh", "w") as newfile: - with open("dummy_script.sh", "r") as dummy_file: - newfile.write(dummy_file.read()) - for command in commands: - newfile.write(f"{command}\n") - return f"{name}_script.sh" - - def output(self): - self.config.yaml_dump() - - -class environment_infos(EnvironmentInfos): - def __init__(self, *args, **kwargs): - warnings.warn( - "Please change your code to use EnvironmentInfos!", - DeprecationWarning, - stacklevel=2, - ) - super(environment_infos, self).__init__(*args, **kwargs) + dpath.util.merge(self.config, new_config, separator=separator) + # Update batch_system if it was changed + if "batch_system" in new_config: + self.batch_system = self.config.get("batch_system", "").lower() or None + if self.batch_system and self.batch_system not in ["slurm", "pbs"]: + raise ValueError( + "batch_system, if specified, must be either 'slurm' or 'pbs'" + ) diff --git a/src/esm_environment/esm_environment_bkup.py b/src/esm_environment/esm_environment_bkup.py new file mode 100644 index 000000000..adf8b96a1 --- /dev/null +++ b/src/esm_environment/esm_environment_bkup.py @@ -0,0 +1,633 @@ +#!/usr/bin/env python +""" +Main module for EsmEnvironment. +""" + +import copy +import os +import warnings +import re +import sys + +import esm_parser + +###################################################################################### +########################### class "environment_infos" ################################ +###################################################################################### + + +class EnvironmentInfos: + """ + The package ``esm_environment`` takes care of generating the environments for the + different HPCs supported by `ESM-Tools`. This is done through the use of the + ``EnvironmentInfos`` class inside the different `ESM-Tools` packages. + + For the correct definition of an environment for an HPC a `yaml` file for that + system needs to be included inside the ``esm_tools`` package inside the + ``configs/machines/`` folder (e.g. ``ollie.yaml``). This file should contain all + the required preset variables for that system and the environment variables + ``module_actions`` and ``export_vars``. + + By instancing the ``EnvironmentInfos`` class, the environment information for + the specified model or coupled setup is compiled and stored in + ``self.commands``. If there are environment variables inside the ``general`` + section, ``__init__`` will ignore the environment variables from the standalone + component files, and it will define the ``general.environment_changes`` for + each component of the setup. + + Parameters + ---------- + run_or_compile : str + A string indicating whether ``EnvironmentInfos`` was instanced from a + compilation operation (``compiletime``) or a run (``runtime``). + complete_config : dict + Dictionary containing all the compiled information from the `yaml` files + needed for the current `ESM-Tools` operation. + model : string + Model for which the environment is required. If not defined, this method + will loop through all the available keys in ``complete_config``. + """ + + def __init__(self, run_or_compile, complete_config=None, model=None): + # Ensure local copy of complete config to avoid mutating it... (facepalm) + complete_config = copy.deepcopy(complete_config) + # Load computer dictionary or initialize it from the correct machine file + if complete_config and "computer" in complete_config: + self.config = complete_config["computer"] + else: + self.machine_file = esm_parser.determine_computer_yaml_from_hostname() + self.config = esm_parser.yaml_file_to_dict(self.machine_file) + esm_parser.basic_choose_blocks(self.config, self.config) + esm_parser.recursive_run_function( + [], + self.config, + "atomic", + esm_parser.find_variable, + self.config, + [], + True, + ) + + # Add_s can only be inside choose_ blocks in the machine file + for entry in ["add_module_actions", "add_export_vars", "add_unset_vars"]: + if entry in self.config: + del self.config[entry] + + # Load the general environments if any + self.general_environment(complete_config, run_or_compile) + + # If the model is defined during the instantiation of the class (e.g. + # during esm_master with a coupled setup), get the environment for that + # model. Otherwise, loop through all the keys of the complete_config dictionary + if model: + self.apply_config_changes(run_or_compile, complete_config, model) + else: + for model in complete_config: + self.apply_config_changes(run_or_compile, complete_config, model) + + # Add the ENVIRONMENT_SET_BY_ESMTOOLS into the exports + self.add_esm_var() + + # Define the environment commands for the script + self.commands = self.get_shell_commands() + + def add_esm_var(self): + """ + Adds the ENVIRONMENT_SET_BY_ESMTOOLS=TRUE to the config, for later + dumping to the shell script. + """ + + if "export_vars" in self.config: + self.config["export_vars"]["ENVIRONMENT_SET_BY_ESMTOOLS"] = "TRUE" + else: + self.config["export_vars"] = {"ENVIRONMENT_SET_BY_ESMTOOLS": "TRUE"} + + def apply_config_changes(self, run_or_compile, config, model): + """ + Calls ``apply_model_changes`` with the selected configuration for the + ``model``. + """ + + self.apply_model_changes( + model, run_or_compile=run_or_compile, modelconfig=config[model] + ) + + def apply_model_changes(self, model, run_or_compile="runtime", modelconfig=None): + """ + Applies the ``environment_changes``, ``compiletime_environment_changes``, + and/or ``runtime_environment_changes`` to the environment configuration of the + ``model`` component. Note that ``model`` can be either a component (e.g. + ``fesom``) or ``general``. + + Parameters + ---------- + model : str + Name of the component for which changes will apply. + run_or_compile : str + A string indicating whether ``EnvironmentInfos`` was instanced from a + compilation operation (``compiletime``) or a run (``runtime``). + modelconfig : dict + Information compiled from the `yaml` files for this specific component. + """ + + # Merge whatever is relevant to this environment operation (either compile or + # run) to ``environment_changes``, taking care of solving possible ``choose_`` + # blocks + thesechanges = run_or_compile + "_environment_changes" + if thesechanges in modelconfig: + # kh 16.09.20 the machine name is already handled here + # additionally handle different versions of the model (i.e. + # choose_version...) for each machine if this is possible here in a more + # generic way, it can be refactored + if "choose_version" in modelconfig[thesechanges]: + if "version" in modelconfig: + if ( + modelconfig["version"] + in modelconfig[thesechanges]["choose_version"] + ): + for k, v in modelconfig[thesechanges]["choose_version"][ + modelconfig["version"] + ].items(): + # kh 16.09.20 move up one level and replace default + modelconfig[thesechanges][k] = v + del modelconfig[thesechanges]["choose_version"] + + # Perform the merging of the environment dictionaries + if "environment_changes" in modelconfig: + modelconfig["environment_changes"].update(modelconfig[thesechanges]) + else: + modelconfig["environment_changes"] = modelconfig[thesechanges] + + if "environment_changes" in modelconfig: + for entry in ["add_module_actions", "add_export_vars", "add_unset_vars"]: + # Initialize the environment variables + if not entry in self.config: + if entry in ["add_module_actions", "add_unset_vars"]: + self.config[entry] = [] + elif entry == "add_export_vars": + self.config[entry] = {} + + if entry == "add_export_vars": + # Transform any list whose name contains add_export_vars into a + # dictionary (machine-file export_vars are from now on always a + # dictionary but add_export_vars of components and setups are + # allowed to be lists for retro-compatibility) + self.turn_add_export_vars_to_dict(modelconfig, entry) + + # Merge the ``environment_changes`` into the general ``config`` + self.config.update(modelconfig["environment_changes"]) + # Change any ``choose_computer.*`` block in ``config`` to ``choose_*`` + self.remove_computer_from_choose(self.config) + + # Resolve ``choose_`` blocks + esm_parser.basic_choose_blocks(self.config, self.config) + + # Remove the environment variables from the config + for entry in ["add_module_actions", "add_export_vars", "add_unset_vars"]: + if entry in self.config: + del self.config[entry] + + def turn_add_export_vars_to_dict(self, modelconfig, entry): + """ + Turns the given ``entry`` in ``modelconfig`` (normally ``add_export_vars``) into + a dictionary, if it is not a dictionary yet. This function is necessary for + retro-compatibility of configuration files having ``add_export_vars`` defined as + list of strings, instead of as dictionaries. + + Parameters + ---------- + modelconfig : dict + Information compiled from the `yaml` files for this specific component. + entry : str + The environment variable (originally developed for ``add_export_vars``) to + be turned into a dictionary. + """ + + # Find the variables whose names contains the entry (e.g. add_export_vars) + path_sep = "," + entry_paths = esm_parser.find_key( + modelconfig["environment_changes"], + entry, + paths2finds=[], + sep=path_sep, + ) + # Loop through the variables + for entry_path in entry_paths: + # Split the path and define the export_dict dictionary that links to the + # current entry. Later, if the content of export_dict is a list it will be + # turned into a dictionary itself + path_to_var = entry_path.split(path_sep) + path_to_var = [esm_parser.convert(leaf) for leaf in path_to_var] + if len(path_to_var) > 1: + export_dict = esm_parser.find_value_for_nested_key( + modelconfig["environment_changes"], + path_to_var[-2], + path_to_var[:-2], + ) + else: + export_dict = modelconfig["environment_changes"] + # Get the value of export_dict + export_vars = export_dict[path_to_var[-1]] + + # If export_vars is a list transform it into a dictionary + if isinstance(export_vars, list): + self.env_list_to_dict(export_dict, path_to_var[-1]) + + def env_list_to_dict(self, export_dict, key): + """ + Transforms lists in ``export_dict`` in dictionaries. This allows to add lists of + ``export_vars`` to the machine-defined ``export_vars`` that should always be a + dictionary. Note that lists are always added at the end of the ``export_vars``, + if you want to edit variables of an already existing dictionary make your + ``export_var`` be a dictionary. + + Avoids destroying repetitions of elements by adding indexes to the keys of the + newly transformed dictionary, for example: + + .. code-block::yaml + your_model: + environment_changes: + add_export_vars: + - 'SOMETHING=dummy' + - 'somethingelse=dummy' + - 'SOMETHING=dummy' + + The ``export_dict[key]`` (where ``key = add_export_vars``) will be transformed + in this function from being a list to be the following dictionary: + + .. code-block::yaml + 'SOMETHING=dummy[(0)][(list)]': 'SOMETHING=dummy' + 'somethingelse=dummy[(0)][(list)]': 'somethingelse=dummy' + 'SOMETHING=dummy[(1)][(list)]': "SOMETHING=dummy' + + Note that, once all the environments are resolved, and before writing the + exports in the bash files, the ``export_vars`` dictionary is transformed again + into a list and the indexes and ``[(list)]`` strings are removed. + + Parameters + ---------- + export_dict : dict + ``export_var`` dictionary which value is a list. This list is transformed + into a dictionary. + key : str + The key to the value. + """ + # Load the value + export_vars = export_dict[key] + # Check if the value is a list TODO: logging + if not isinstance(export_vars, list): + print( + f"The only reason to use this function is if {key} is a list, and it " + + "is not in this case..." + ) + sys.exit(1) + + # Loop through the elements of the list + new_export_vars = {} + for var in export_vars: + # Initialize index + index = 0 + while True: + # If the key with the current index already exists move the move the + # index forward + if var + f"[({index})][(list)]" in new_export_vars: + index += 1 + # If the key with the current index does not exist yet, add the element + # to the dictionary + else: + new_export_vars[f"{var}[({index})][(list)]"] = var + break + + # Redefined the transformed dictionary + export_dict[key] = new_export_vars + + def general_environment(self, complete_config, run_or_compile): + """ + Checks if there are ``environment_changes`` inside the ``general`` section, and + if that is the case, ignore the changes loaded from the component files. + + Parameters + ---------- + complete_config : dict + Dictionary containing all the compiled information from the `yaml` files + needed for the current `ESM-Tools` operation. + run_or_compile : str + A string indicating whether ``EnvironmentInfos`` was instanced from a + compilation operation (``compiletime``) or a run (``runtime``). + """ + + # If the general section exists load the general environments + general_env = False + if "general" in complete_config: + # Is it a coupled setup? + coupled_setup = complete_config["general"].get("coupled_setup", False) + + # Check if a general setup environment exists that will overwrite the + # component setups + if coupled_setup and ( + "compiletime_environment_changes" in complete_config["general"] + or "runtime_environment_changes" in complete_config["general"] + or "environment_changes" in complete_config["general"] + ): # TODO: do this if the model include other models and the environment is + # labelled as priority over the other models environment (OIFS case) + general_env = True + self.apply_config_changes(run_or_compile, complete_config, "general") + + # If there is a general environment remove all the model specific environments + # defined in the model files and preserve only the model specific environments + # that are explicitly defined in the setup file + if general_env: + self.load_component_env_changes_only_in_setup(complete_config) + + def load_component_env_changes_only_in_setup(self, complete_config): + """ + Removes all the model specific environments defined in the component files and + preserve only the component-specific environments that are explicitly defined in + the setup file. + + Parameters + ---------- + complete_config : dict + Dictionary containing all the compiled information from the `yaml` files + needed for the current `ESM-Tools` operation. + """ + + # Get necessary variables + setup = complete_config.get("general", {}).get("model", None) + version = str(complete_config.get("general", {}).get("version", None)) + models = complete_config.get("general", {}).get("models", None) + # Check for errors TODO: logging + if not models: + print( + "Use the EnvironmentInfos.load_component_env_changes_only_in_setup " + + "method only if complete_config has a general chapter that includes " + + "a models list" + ) + sys.exit(1) + + # Find the setup file + include_path, needs_load = esm_parser.look_for_file( + setup, + setup + "-" + version, + ) + # If setup file not found throw and error TODO: logging + if not include_path: + print(f"File for {setup}-{version} not found") + sys.exit(1) + # Load the file TODO: logging + if needs_load: + setup_config = esm_parser.yaml_file_to_dict(include_path) + else: + print(f"A setup needs to load a file so this line shouldn't be reached") + sys.exit(1) + + # Add the attachment files (e.g. the environment variables can be in a + # further_reading file) + for attachment in esm_parser.CONFIGS_TO_ALWAYS_ATTACH_AND_REMOVE: + # Add the attachment file chapters (e.g. there is a further_reading chapter + # at the same level of general and the components) + esm_parser.attach_to_config_and_remove(setup_config, attachment) + # Add the attachment files in each chapter (i.e. in general, components, + # etc.) + for component in list(setup_config): + esm_parser.attach_to_config_and_remove( + setup_config[component], + attachment, + ) + + # Define the possible environment variables + environment_vars = [ + "environment_changes", + "compiletime_environment_changes", + "runtime_environment_changes", + ] + # Loop through the models + for model in models: + # Sanity check TODO: logging + if model not in complete_config: + print(f"The chapter {model} does not exist in complete_config") + sys.exit(1) + # Load the configuration of this model + model_config = complete_config[model] + # Loop through the possible environment variables + for env_var in environment_vars: + # If the environment variable exists replace it with the one defined in + # the setup file for that model: + # 1. Delete the variable + if env_var in model_config: + del model_config[env_var] + # 2. Redefine the variable + if env_var in setup_config.get(model, {}): + # Solve any unresolved variables in the reloaded setup environment + # TODO: change this to be out of the loop using the method + # ``model_config.finalize()``, currently not working due to + # a problem with the dates + esm_parser.recursive_run_function( + [], + setup_config[model][env_var], + "atomic", + esm_parser.find_variable, + complete_config, + {}, + {}, + ) + # Actually redefine the variable + model_config[env_var] = setup_config[model][env_var] + + def replace_model_dir(self, model_dir): + """ + Replaces any instances of ${model_dir} in the config section + "export_vars" with the argument + + Parameters + ---------- + model_dir : str + The replacement string for ${model_dir} + """ + for entry in ["export_vars"]: + if entry in self.config: + newlist = [] + for line in self.config[entry]: + newline = line.replace("${model_dir}", model_dir) + newlist.append(newline) + self.config[entry] = newlist + + def get_shell_commands(self): + """ + Gathers module actions and export variables from the config to a list, + prepending appropriate shell command words (e.g. module and export). + + If the ``export_vars`` dictionary contains variables with repetition + indexes (``[(int)]``) or ``[(list)]``, those are removed before returning the + command list. + + Returns + ------- + environment : list + A list of the environment operations, to be used in the compilation and run + scripts. + """ + + environment = [] + # Fix for seb-wahl's hack via source + if self.config.get("general_actions") is not None: + for action in self.config["general_actions"]: + environment.append(action) + # Write module actions + if self.config.get("module_actions") is not None: + for action in self.config["module_actions"]: + # seb-wahl: workaround to allow source ... to be added to the batch header + # until a proper solution is available. Required with FOCI + if action.startswith("source"): + environment.append(action) + else: + environment.append(f"module {action}") + # Write Spack actions + if self.config.get("spack_actions") is not None: + for action in self.config["spack_actions"]: + environment.append(f"spack {action}") + # Add an empty string as a newline: + environment.append("") + if self.config.get("export_vars") is not None: + for var in self.config["export_vars"]: + # If export_vars is a dictionary + if isinstance(self.config["export_vars"], dict): + # If the variable is a dictionary itself (e.g. "AWI_FESOM_YAML" + # in fesom-1.4) add the contents of the dictionary as the value of + # the exported variable inside '""' + if isinstance(self.config["export_vars"][var], dict): + key = var + value = self.config["export_vars"][key] + environment.append(f"export {key}='{str(value)}'") + # If the value of the variable is not a dictionary + else: + key = var + value = self.config["export_vars"][key] + # Define the pattern for indexes [(int)] + ipattern = "\[+\(\d+\)+\]$" + # If the variable was added as a list produce the correct string + if key.endswith("[(list)]"): + key = key.replace("[(list)]", "") + environment.append(f"export {value}") + # If the variable contained a repetition index, remove it + elif re.search(ipattern, key): + environment.append( + f"export {re.sub(ipattern, '', key)}={str(value)}" + ) + # It it is a normal variable return the export command + else: + environment.append(f"export {key}={str(value)}") + # If export_vars is a list append the export command (this should not + # happen anymore as the export_vars in the machine files should be all + # defined now as dictionaries + else: + environment.append("export {str(var)}") + environment.append("") + # Write the unset commands + if self.config.get("unset_vars") is not None: + for var in self.config["unset_vars"]: + environment.append(f"unset {var}") + + return environment + + def write_dummy_script(self, include_set_e=True): + """ + Writes a dummy script containing only the header information, module + commands, and export variables. The actual compile/configure commands + are added later. + + Parameters + ---------- + include_set_e : bool + Default to True, whether or not to include a ``set -e`` at the + beginning of the script. This causes the shell to stop as soon as + an error is encountered. + """ + # Check for sh_interpreter + if "sh_interpreter" not in self.config: + print('WARNING: "sh_interpreter" not defined in the machine yaml') + with open("dummy_script.sh", "w") as script_file: + # Write the file headings + script_file.write( + f'#!{self.config.get("sh_interpreter", "/bin/bash")} -l\n' + ) + script_file.write( + "# Dummy script generated by esm-tools, to be removed later: \n" + ) + if include_set_e: + script_file.write("set -e\n") + + # Write the module and export commands + for command in self.commands: + script_file.write(f"{command}\n") + script_file.write("\n") + + def remove_computer_from_choose(self, chapter): + """ + Recursively remove ``computer.`` from all the `choose_` keys. + + Parameters + ---------- + chapter : dict + Dictionary to search for ``choose_computer.`` blocks. + """ + all_keys = list(chapter.keys()) + for key in all_keys: + if isinstance(key, str) and "choose_computer." in key: + newkey = key.replace("computer.", "") + chapter[newkey] = chapter[key] + del chapter[key] + key = newkey + if isinstance(chapter[key], dict): + self.remove_computer_from_choose(chapter[key]) + + @staticmethod + def cleanup_dummy_script(): + """ + Removes the ``dummy_script.sh`` if it exists. + """ + try: + os.remove("dummy_script.sh") + except OSError: + print("No file dummy_script.sh there; nothing to do...") + + @staticmethod + def add_commands(commands, name): + """ + Writes all commands in a list to a file named ``_script.sh``, + located in the current working directory. The header from this script + is read from ``dummy_script.sh``, also in the current working + directory. + + Parameters + ---------- + commands : list of str + List of the commands to write to the file after the header + name : str + Name of the script, generally something like ``comp_echam-6.3.05`` + + Returns + ------- + str : + ``name`` + "_script.sh" + """ + if commands: + with open(f"{name}_script.sh", "w") as newfile: + with open("dummy_script.sh", "r") as dummy_file: + newfile.write(dummy_file.read()) + for command in commands: + newfile.write(f"{command}\n") + return f"{name}_script.sh" + + def output(self): + self.config.yaml_dump() + + +class environment_infos(EnvironmentInfos): + def __init__(self, *args, **kwargs): + warnings.warn( + "Please change your code to use EnvironmentInfos!", + DeprecationWarning, + stacklevel=2, + ) + super(environment_infos, self).__init__(*args, **kwargs) diff --git a/templates/__init__.py b/templates/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/templates/esm_environment/__init__.py b/templates/esm_environment/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/templates/esm_environment/base.sh.j2 b/templates/esm_environment/base.sh.j2 new file mode 100644 index 000000000..05c5b26f4 --- /dev/null +++ b/templates/esm_environment/base.sh.j2 @@ -0,0 +1,9 @@ +#!{{ sh_interpreter }} -l +{% if include_set_e %} +set -e +{% endif %} + +{# Include other template components #} +{% include 'module_actions.sh.j2' %} +{% include 'exports.sh.j2' %} +{% include 'unset.sh.j2' %} \ No newline at end of file diff --git a/templates/esm_environment/exports.sh.j2 b/templates/esm_environment/exports.sh.j2 new file mode 100644 index 000000000..6b938fcdf --- /dev/null +++ b/templates/esm_environment/exports.sh.j2 @@ -0,0 +1,13 @@ +{% if export_vars is defined %} +{% for key, value in export_vars.items() %} +{% if value is mapping %} +export {{ key }}='{{ value|tojson }}' +{% else %} +{% if key.endswith('[(list)]') %} +export {{ value }} +{% else %} +export {{ key|replace(r'\[\(\d+\)\]', '')|replace('[(list)]', '') }}={{ value }} +{% endif %} +{% endif %} +{% endfor %} +{% endif %} \ No newline at end of file diff --git a/templates/esm_environment/headers/pbs.sh.j2 b/templates/esm_environment/headers/pbs.sh.j2 new file mode 100644 index 000000000..1adc9702d --- /dev/null +++ b/templates/esm_environment/headers/pbs.sh.j2 @@ -0,0 +1,22 @@ +{# PBS Header Template #} +{% if job %} +#PBS -N {{ job.name }} +{% if job.tasks %} +#PBS -l nodes=1:ppn={{ job.tasks }} +{% endif %} +{% if job.time %} +#PBS -l walltime={{ job.time }} +{% endif %} +{% if job.partition %} +#PBS -q {{ job.partition }} +{% endif %} +{% if job.account %} +#PBS -A {{ job.account }} +{% endif %} +{% if job.mail_type %} +#PBS -m {{ 'abe' if job.mail_type == 'ALL' else job.mail_type }} +{% endif %} +{% if job.mail_user %} +#PBS -M {{ job.mail_user }} +{% endif %} +{% endif %} \ No newline at end of file diff --git a/templates/esm_environment/headers/slurm.sh.j2 b/templates/esm_environment/headers/slurm.sh.j2 new file mode 100644 index 000000000..60a1ba00a --- /dev/null +++ b/templates/esm_environment/headers/slurm.sh.j2 @@ -0,0 +1,22 @@ +{# SLURM Header Template #} +{% if job %} +#SBATCH --job-name={{ job.name }} +{% if job.tasks %} +#SBATCH --ntasks={{ job.tasks }} +{% endif %} +{% if job.time %} +#SBATCH --time={{ job.time }} +{% endif %} +{% if job.partition %} +#SBATCH --partition={{ job.partition }} +{% endif %} +{% if job.account %} +#SBATCH --account={{ job.account }} +{% endif %} +{% if job.mail_type %} +#SBATCH --mail-type={{ job.mail_type }} +{% endif %} +{% if job.mail_user %} +#SBATCH --mail-user={{ job.mail_user }} +{% endif %} +{% endif %} \ No newline at end of file diff --git a/templates/esm_environment/module_actions.sh.j2 b/templates/esm_environment/module_actions.sh.j2 new file mode 100644 index 000000000..771120eff --- /dev/null +++ b/templates/esm_environment/module_actions.sh.j2 @@ -0,0 +1,22 @@ +{# Handle module loading #} +{% if general_actions is defined %} +{% for action in general_actions %} +{{ action }} +{% endfor %} +{% endif %} + +{% if module_actions is defined %} +{% for action in module_actions %} +{% if action.startswith('source') %} +{{ action }} +{% else %} +module {{ action }} +{% endif %} +{% endfor %} +{% endif %} + +{% if spack_actions is defined %} +{% for action in spack_actions %} +spack {{ action }} +{% endfor %} +{% endif %} \ No newline at end of file diff --git a/templates/esm_environment/unset.sh.j2 b/templates/esm_environment/unset.sh.j2 new file mode 100644 index 000000000..fc1f884cd --- /dev/null +++ b/templates/esm_environment/unset.sh.j2 @@ -0,0 +1,5 @@ +{% if unset_vars is defined %} +{% for var in unset_vars %} +unset {{ var }} +{% endfor %} +{% endif %} \ No newline at end of file From a65765289c30d81d692005c9cf0bd0b1de74a2f9 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 18 Feb 2025 10:09:29 +0100 Subject: [PATCH 2/8] wip: allows for adding actual commands to the end of the generated script file, usage in esm-master --- src/esm_environment/esm_environment.py | 11 ++++++++++- src/esm_master/task.py | 15 +++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/esm_environment/esm_environment.py b/src/esm_environment/esm_environment.py index f2d95d8fd..764aed595 100644 --- a/src/esm_environment/esm_environment.py +++ b/src/esm_environment/esm_environment.py @@ -177,7 +177,9 @@ def __init__( loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True ) - def render(self, include_set_e: bool = True) -> str: + def render( + self, include_set_e: bool = True, tail_commands: Optional[list] = None + ) -> str: """ Render a complete script, optionally including batch system headers. @@ -185,6 +187,8 @@ def render(self, include_set_e: bool = True) -> str: ---------- include_set_e : bool, optional Whether to include 'set -e' in the script (default: True) + tail_commands : list, optional + Commands to add at the end of the rendered template Returns ------- @@ -217,6 +221,8 @@ def render(self, include_set_e: bool = True) -> str: jinja2.TemplateError If there are syntax errors in the templates """ + if tail_commands is None: + tail_commands = [] script_parts = [] # Add batch system header if specified @@ -248,6 +254,9 @@ def render(self, include_set_e: bool = True) -> str: # Add shell interpreter line at the beginning script_parts.insert(0, shell_interpreter) + for tail_command in tail_commands: + script_parts.append(tail_command) + return "\n".join(script_parts) def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> None: diff --git a/src/esm_master/task.py b/src/esm_master/task.py index 949d1a188..d8d1a8208 100644 --- a/src/esm_master/task.py +++ b/src/esm_master/task.py @@ -69,8 +69,8 @@ def __init__(self, raw, setup_info, vcs, general, complete_config, parsed_args): ) if kind == "components": - self.env = esm_environment.esm_environment.EnvironmentInfos( - "compiletime", complete_config, model + self.env = esm_environment.esm_environment.BatchScriptTemplate( + complete_config ) else: self.env = None @@ -429,10 +429,13 @@ def generate_task_script(self): for task in self.ordered_tasks: if task.todo in ["conf", "comp"]: if task.package.kind == "components": - task.env.write_dummy_script() - newfile = task.env.add_commands( - task.package.command_list[task.todo], task.raw_name - ) + with open(f"{task.raw_name}_script.sh", "w") as newfile: + newfile.write( + task.env.render( + tail_commands=task.package.command_list[task.todo] + ) + ) + newfile = f"{task.raw_name}_script.sh" if os.path.isfile(newfile): os.chmod(newfile, 0o755) From 164ad76c199f6c6967f6f1550f202260fcb3ee3a Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 18 Feb 2025 15:07:30 +0100 Subject: [PATCH 3/8] wip --- setup.py | 1 + src/esm_environment/esm_environment.py | 150 ++++++++++++++++-- .../esm_environment/templates}/__init__.py | 0 .../esm_environment/templates}/base.sh.j2 | 0 .../esm_environment/templates}/exports.sh.j2 | 12 +- .../templates}/headers/pbs.sh.j2 | 0 .../templates}/headers/slurm.sh.j2 | 0 .../templates}/module_actions.sh.j2 | 0 .../esm_environment/templates}/unset.sh.j2 | 0 src/esm_master/task.py | 18 ++- templates/esm_environment/__init__.py | 0 11 files changed, 158 insertions(+), 23 deletions(-) rename {templates => src/esm_environment/templates}/__init__.py (100%) rename {templates/esm_environment => src/esm_environment/templates}/base.sh.j2 (100%) rename {templates/esm_environment => src/esm_environment/templates}/exports.sh.j2 (52%) rename {templates/esm_environment => src/esm_environment/templates}/headers/pbs.sh.j2 (100%) rename {templates/esm_environment => src/esm_environment/templates}/headers/slurm.sh.j2 (100%) rename {templates/esm_environment => src/esm_environment/templates}/module_actions.sh.j2 (100%) rename {templates/esm_environment => src/esm_environment/templates}/unset.sh.j2 (100%) delete mode 100644 templates/esm_environment/__init__.py diff --git a/setup.py b/setup.py index 4f4a1b14c..ba1fad380 100644 --- a/setup.py +++ b/setup.py @@ -100,6 +100,7 @@ "esm_tools.namelists", "esm_tools.runscripts", "esm_tools.couplings", + "esm_tools.templates", ], package_dir={ "": "src", diff --git a/src/esm_environment/esm_environment.py b/src/esm_environment/esm_environment.py index 764aed595..31763bcd4 100644 --- a/src/esm_environment/esm_environment.py +++ b/src/esm_environment/esm_environment.py @@ -1,6 +1,6 @@ """ ESM-Tools Batch Script Template Module -==================================== +====================================== A template-based script generation system for HPC environments using Jinja2. Supports generation of both simple shell scripts and batch job scripts @@ -40,7 +40,10 @@ dpath : Deep dictionary utilities for nested updates """ -import importlib.resources as pkg_resources +# NOTE(PG): I'm not sure exactly, but pkg_resources is outdated +# Instead, it is recommended to use importlib! +import importlib.resources +# import importlib.resources as pkg_resources from pathlib import Path from typing import Any, Dict, Optional @@ -48,6 +51,66 @@ from jinja2 import Environment, FileSystemLoader +def clean_env_var_name(name: str) -> str: + """ + Remove numbered suffixes from environment variable names. + + Removes suffixes of the form [(number)] from environment variable names. + This is useful when dealing with duplicate environment variables that + have been numbered for uniqueness but need to be exported without the suffix. + + Parameters + ---------- + name : str + The environment variable name to clean + + Returns + ------- + str + The cleaned environment variable name + + Examples + -------- + >>> clean_env_var_name("PATH[(1)]") + 'PATH' + >>> clean_env_var_name("PYTHONPATH[(42)]") + 'PYTHONPATH' + >>> clean_env_var_name("NORMAL_VAR") + 'NORMAL_VAR' + >>> clean_env_var_name("MY_LIST[(list)]") + 'MY_LIST' + + Notes + ----- + This function specifically handles: + - Environment variables with numeric suffixes: VAR[(number)] + - Environment variables with list suffix: VAR[(list)] + - Regular environment variables (returns unchanged) + + The template that uses this function will skip empty or ``None`` values:: + + >>> config = { + ... "export_vars": { + ... "PATH[(1)]": "/usr/bin", + ... "EMPTY_VAR": "", + ... "NONE_VAR": None, + ... "ZERO_VAR": 0, + ... "LIST_VAR[(list)]": "", + ... "VALID_VAR": "value" + ... } + ... } + + Will generate:: + + export PATH=/usr/bin + export ZERO_VAR=0 + export VALID_VAR=value + """ + if "[(" in name and ")]" in name: + return name.split("[(")[0] + return name + + class BatchScriptTemplate: """ A template engine for generating shell and batch job scripts. @@ -67,9 +130,6 @@ class BatchScriptTemplate: - batch_system : str, optional Either 'slurm' or 'pbs'. If not provided, generates a simple shell script without batch headers. - - job : dict, optional - Job-specific settings (name, tasks, time, etc.). - Required if batch_system is specified. - module_actions : list List of module commands to execute - export_vars : dict @@ -163,19 +223,67 @@ def __init__( "batch_system, if specified, must be either 'slurm' or 'pbs'" ) - if self.batch_system and "job" not in config: - raise ValueError( - "job configuration is required when batch_system is specified" - ) + # FIXME(PG): This is a nice idea, but it doesn't fit here... + # if self.batch_system and "job" not in config: + # raise ValueError( + # "job configuration is required when batch_system is specified" + # ) if template_dir is None: # Use the package's default templates - with pkg_resources.path("esm_tools.templates", "") as template_path: - template_dir = template_path / "esm_environment" + # NOTE(PG): Old implementation with pkg_resources is outdated! + # with pkg_resources.path("esm_tools.templates", "") as template_path: + # template_dir = template_path / "esm_environment" + with importlib.resources.as_file( + importlib.resources.files("esm_environment") + ) as template_path: + template_dir = template_path / "templates" self.env = Environment( loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True ) + # Add custom jinja2 filters: + self.env.filters["clean_env_var"] = clean_env_var_name + + # Post inits: clean up and finialize the configuration: + self._post_init_add_esm_var() + + @classmethod + def from_complete_config( + cls, config: dict, template_dir: Optional[Path] = None + ) -> "BatchScriptTemplate": + """Initializes a BatchScriptTemplate from a full simulation config + + This extracts only the computer key of a complete simulation config + and uses it to create the BatchScriptTemplate. Mirrors old behaviour. + + Raises + ------ + KeyError : + If you don't have a computer in your complete config, this + constructor won't work! + """ + if "computer" in config: + return cls(config["computer"], template_dir=template_dir) + raise KeyError( + "Must give a complete simulation config containing a ``computer`` key!" + ) + + def _post_init_add_esm_var(self): + """ + Adds the ENVIRONMENT_SET_BY_ESMTOOLS=TRUE to the config, for later + dumping to the shell script. + + Mutates + ------- + self.config : + A key "export_vars" is either initialized as a dictionary, or extended + by ENVIRONMENT_SET_BY_ESMTOOLS=True. + """ + if "export_vars" in self.config: + self.config["export_vars"]["ENVIRONMENT_SET_BY_ESMTOOLS"] = "TRUE" + else: + self.config["export_vars"] = {"ENVIRONMENT_SET_BY_ESMTOOLS": "TRUE"} def render( self, include_set_e: bool = True, tail_commands: Optional[list] = None @@ -249,7 +357,8 @@ def render( # that the shell interpreter line is still at the top: environment = environment.split("\n") shell_interpreter = environment.pop(0) # Remove shell interpreter line - script_parts.append(environment) + for env_statement in environment: + script_parts.append(env_statement) # Add shell interpreter line at the beginning script_parts.insert(0, shell_interpreter) @@ -257,6 +366,7 @@ def render( for tail_command in tail_commands: script_parts.append(tail_command) + breakpoint() return "\n".join(script_parts) def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> None: @@ -324,3 +434,19 @@ def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> Non raise ValueError( "batch_system, if specified, must be either 'slurm' or 'pbs'" ) + + def write_dummy_script(self, include_set_e: bool = True) -> None: + """ + Writes a dummy script containing only the header information, module + commands, and export variables. The actual compile/configure commands + are added later. + + Parameters + ---------- + include_set_e : bool + Default to True, whether or not to include a ``set -e`` at the + beginning of the script. This causes the shell to stop as soon as + an error is encountered. + """ + with open("dummy_script.sh", "w") as script_file: + script_file.write(self.render(include_set_e=include_set_e)) diff --git a/templates/__init__.py b/src/esm_environment/templates/__init__.py similarity index 100% rename from templates/__init__.py rename to src/esm_environment/templates/__init__.py diff --git a/templates/esm_environment/base.sh.j2 b/src/esm_environment/templates/base.sh.j2 similarity index 100% rename from templates/esm_environment/base.sh.j2 rename to src/esm_environment/templates/base.sh.j2 diff --git a/templates/esm_environment/exports.sh.j2 b/src/esm_environment/templates/exports.sh.j2 similarity index 52% rename from templates/esm_environment/exports.sh.j2 rename to src/esm_environment/templates/exports.sh.j2 index 6b938fcdf..77fab64a6 100644 --- a/templates/esm_environment/exports.sh.j2 +++ b/src/esm_environment/templates/exports.sh.j2 @@ -1,13 +1,19 @@ -{% if export_vars is defined %} +{% if export_vars is defined and export_vars %} {% for key, value in export_vars.items() %} +{% if value is not none %} {% if value is mapping %} export {{ key }}='{{ value|tojson }}' {% else %} {% if key.endswith('[(list)]') %} +{% if value %} export {{ value }} +{% endif %} {% else %} -export {{ key|replace(r'\[\(\d+\)\]', '')|replace('[(list)]', '') }}={{ value }} +{% if value|string %} +export {{ key|clean_env_var }}={{ value }} +{% endif %} +{% endif %} {% endif %} {% endif %} {% endfor %} -{% endif %} \ No newline at end of file +{% endif %} diff --git a/templates/esm_environment/headers/pbs.sh.j2 b/src/esm_environment/templates/headers/pbs.sh.j2 similarity index 100% rename from templates/esm_environment/headers/pbs.sh.j2 rename to src/esm_environment/templates/headers/pbs.sh.j2 diff --git a/templates/esm_environment/headers/slurm.sh.j2 b/src/esm_environment/templates/headers/slurm.sh.j2 similarity index 100% rename from templates/esm_environment/headers/slurm.sh.j2 rename to src/esm_environment/templates/headers/slurm.sh.j2 diff --git a/templates/esm_environment/module_actions.sh.j2 b/src/esm_environment/templates/module_actions.sh.j2 similarity index 100% rename from templates/esm_environment/module_actions.sh.j2 rename to src/esm_environment/templates/module_actions.sh.j2 diff --git a/templates/esm_environment/unset.sh.j2 b/src/esm_environment/templates/unset.sh.j2 similarity index 100% rename from templates/esm_environment/unset.sh.j2 rename to src/esm_environment/templates/unset.sh.j2 diff --git a/src/esm_master/task.py b/src/esm_master/task.py index d8d1a8208..0c3734f7d 100644 --- a/src/esm_master/task.py +++ b/src/esm_master/task.py @@ -59,7 +59,7 @@ def __init__(self, raw, setup_info, vcs, general, complete_config, parsed_args): self.package = software_package( (kind, model, version), setup_info, vcs, general ) - else: # tupel: + elif isinstance(raw, tuple): (self.todo, kind, model, version, self.only_subtask) = raw self.package = software_package( (kind, model, version), setup_info, vcs, general @@ -67,14 +67,16 @@ def __init__(self, raw, setup_info, vcs, general, complete_config, parsed_args): self.raw_name = setup_info.assemble_raw_name( self.todo, kind, model, version ) + else: + raise TypeError(f"Unsupported type {type(raw)}") if kind == "components": - self.env = esm_environment.esm_environment.BatchScriptTemplate( + self.env = esm_environment.BatchScriptTemplate.from_complete_config( complete_config ) else: self.env = None - if not self.todo in setup_info.meta_todos: + if self.todo not in setup_info.meta_todos: self.check_if_target(setup_info) self.subtasks = self.get_subtasks( @@ -558,7 +560,7 @@ def num_of_get_commands(self): if command in subtask.package.command_list.get("get", []): self.num_get_commands += 1 break - return self.num_get_commands # (Not strictly needed, but might be nice?) + return self.num_get_commands # (Not strictly needed, but might be nice?) def get_repo_properties_from_command(self, command): """ @@ -597,7 +599,7 @@ def get_repo_properties_from_command(self, command): for subtask in self.subtasks: get_commands = subtask.package.command_list.get("get") if command in get_commands: - repo ["package"]= subtask.package + repo["package"] = subtask.package repo["is_repo_operation"] = True self.executed_repo_commands.append(command) if self.num_get_commands == len(self.executed_repo_commands): @@ -622,7 +624,7 @@ def add_repo_error(self, command, repo, error): of the ``error`` parameter if it the destination does not exist Note - ---- + ---- Since the ``error`` is caught from an ``except`` of a ``subprocess`` command, the actual error that occurred during the execution of the ``command`` by ``subprocess.run`` is not caught. That means that we cannot evaluate which @@ -697,7 +699,7 @@ def report_repo_errors(self): "access and reading permissions to the repositories listed below. If " "you don't, contact the person in charge of that particular repository " "(see ``contact`` in the repository list below).\n\n" - f"Repositories with problems:\n{problematic_repos}" + f"Repositories with problems:\n{problematic_repos}", ) def report_destination_path_errors(self): @@ -723,5 +725,5 @@ def report_destination_path_errors(self): "model consider deleting that folder. If instead, you want to keep " "that, you can use other esm_master commands (e.g. esm_master " "comp--). Destinations already present:\n" - f"{problematic_destinations}\n" + f"{problematic_destinations}\n", ) diff --git a/templates/esm_environment/__init__.py b/templates/esm_environment/__init__.py deleted file mode 100644 index e69de29bb..000000000 From a606f60b9b38838c91fe262ede041bcefb3693fe Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 18 Feb 2025 15:17:59 +0100 Subject: [PATCH 4/8] wip: kinda does what it should --- src/esm_environment/templates/exports.sh.j2 | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/esm_environment/templates/exports.sh.j2 b/src/esm_environment/templates/exports.sh.j2 index 77fab64a6..d150959e0 100644 --- a/src/esm_environment/templates/exports.sh.j2 +++ b/src/esm_environment/templates/exports.sh.j2 @@ -1,19 +1,7 @@ -{% if export_vars is defined and export_vars %} +{% if export_vars is defined %} {% for key, value in export_vars.items() %} -{% if value is not none %} -{% if value is mapping %} -export {{ key }}='{{ value|tojson }}' -{% else %} -{% if key.endswith('[(list)]') %} {% if value %} -export {{ value }} -{% endif %} -{% else %} -{% if value|string %} export {{ key|clean_env_var }}={{ value }} {% endif %} -{% endif %} -{% endif %} -{% endif %} {% endfor %} {% endif %} From 24ccef071458352f29e07b802095e06e40d44c44 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 18 Feb 2025 15:22:11 +0100 Subject: [PATCH 5/8] wip: fesom 2.6 compiles successfully on albedo --- src/esm_environment/esm_environment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/esm_environment/esm_environment.py b/src/esm_environment/esm_environment.py index 31763bcd4..0ae9c0df9 100644 --- a/src/esm_environment/esm_environment.py +++ b/src/esm_environment/esm_environment.py @@ -366,7 +366,6 @@ def render( for tail_command in tail_commands: script_parts.append(tail_command) - breakpoint() return "\n".join(script_parts) def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> None: From 4be4fc292aca110d20bb8cd66ed61538c07b190c Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 19 Feb 2025 07:33:57 +0100 Subject: [PATCH 6/8] wip --- src/esm_environment/esm_environment.py | 64 ++++++++++++++++++++++-- src/esm_environment/templates/base.sh.j2 | 4 +- src/esm_master/task.py | 5 ++ 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/src/esm_environment/esm_environment.py b/src/esm_environment/esm_environment.py index 0ae9c0df9..b7a4563d5 100644 --- a/src/esm_environment/esm_environment.py +++ b/src/esm_environment/esm_environment.py @@ -40,12 +40,13 @@ dpath : Deep dictionary utilities for nested updates """ +import copy # NOTE(PG): I'm not sure exactly, but pkg_resources is outdated # Instead, it is recommended to use importlib! import importlib.resources # import importlib.resources as pkg_resources from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple import dpath from jinja2 import Environment, FileSystemLoader @@ -53,11 +54,13 @@ def clean_env_var_name(name: str) -> str: """ - Remove numbered suffixes from environment variable names. + Remove enclosed suffixes from environment variable names. - Removes suffixes of the form [(number)] from environment variable names. + Removes suffixes of the form [(ANYTHING)] from environment variable names. This is useful when dealing with duplicate environment variables that - have been numbered for uniqueness but need to be exported without the suffix. + have been numbered for uniqueness but need to be exported without the suffix, + and has most often been used in the form [(1)], numbering certain export variables + to avoid overriding them. Parameters ---------- @@ -111,6 +114,57 @@ def clean_env_var_name(name: str) -> str: return name +def _find_keys_with_substring(d: dict[Any, Any], substring: str) -> list[str]: + """Find pahs to keys containing a given substring in a nested dictionary using dpath + + Parameters + ---------- + d : dict + The dictionary to search in + substring : str + What to look for + + Returns + ------- + list[str] : + List of keys + """ + matches = [] + for path in dpath.search(d, "**", yielded=True): + key_path, _ = path + if substring in key_path.split("/")[-1]: # Check the last part of the path + matches.append(key_path) + return matches + + +def finialize_config_with_env_changes( + config: dict, setting: Optional[str] = None +) -> dict: + if setting not in ["runtime", "compiletime"]: + raise ValueError( + f"Can only finialize a configuration for ``runtime`` or ``compiletime``, got {setting=}!" + ) + config = copy.deepcopy(config) + environment_changes_keys = _find_keys_with_substring(config, "environment_changes") + xtime_keys = _find_keys_with_substring(config, f"{setting}_environment_changes") + all_change_keys = set((environment_changes_keys, xtime_keys)) + if all_change_keys: + breakpoint() + return config + + +class EnvironmentChangeManager: + _VALID_PHASES: Tuple(str, ...) = ( + "runtime", + "compiletime", + ) + """tuple : The phases which the EnvironmentChangeManager can be usefully applied to""" + + def __init__(self, execution_phase: str): + if execution_phase not in self._SUPPORTED_JOB_TIMES: + raise TypeError(f"Please use one of {_VALID_PHASES} for the {__class__}") + + class BatchScriptTemplate: """ A template engine for generating shell and batch job scripts. @@ -170,7 +224,7 @@ class BatchScriptTemplate: ... } >>> batch = BatchScriptTemplate(config) >>> print(batch.render()) - #!/bin/bash -l + #!/bin/bash module load intel export OMP_NUM_THREADS=4 diff --git a/src/esm_environment/templates/base.sh.j2 b/src/esm_environment/templates/base.sh.j2 index 05c5b26f4..8f2fe9dfd 100644 --- a/src/esm_environment/templates/base.sh.j2 +++ b/src/esm_environment/templates/base.sh.j2 @@ -1,4 +1,4 @@ -#!{{ sh_interpreter }} -l +#!{{ sh_interpreter }} {% if include_set_e %} set -e {% endif %} @@ -6,4 +6,4 @@ set -e {# Include other template components #} {% include 'module_actions.sh.j2' %} {% include 'exports.sh.j2' %} -{% include 'unset.sh.j2' %} \ No newline at end of file +{% include 'unset.sh.j2' %} diff --git a/src/esm_master/task.py b/src/esm_master/task.py index 0c3734f7d..b4fce34fb 100644 --- a/src/esm_master/task.py +++ b/src/esm_master/task.py @@ -71,6 +71,11 @@ def __init__(self, raw, setup_info, vcs, general, complete_config, parsed_args): raise TypeError(f"Unsupported type {type(raw)}") if kind == "components": + # Find environment changes for compiletime + complete_config = esm_environment.finialize_config_with_env_changes( + complete_config, "compiletime" + ) + breakpoint() self.env = esm_environment.BatchScriptTemplate.from_complete_config( complete_config ) From 07b5b1976b85e121bc9d287fcf93febe6354e320 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 19 Feb 2025 09:41:52 +0100 Subject: [PATCH 7/8] wip: cleanup of the in-code examples and refactoring of ScriptTemplate to also have subclasses for SLURMBatchScriptTemplate and PBSBatchScriptTemplate --- src/esm_environment/esm_environment.py | 241 +++++++++++-------------- 1 file changed, 107 insertions(+), 134 deletions(-) diff --git a/src/esm_environment/esm_environment.py b/src/esm_environment/esm_environment.py index b7a4563d5..ae1e61557 100644 --- a/src/esm_environment/esm_environment.py +++ b/src/esm_environment/esm_environment.py @@ -1,5 +1,5 @@ """ -ESM-Tools Batch Script Template Module +ESM-Environment Script Template Module ====================================== A template-based script generation system for HPC environments using Jinja2. @@ -12,25 +12,13 @@ Examples -------- ->>> from batch_script_template import BatchScriptTemplate +>>> from esm_environment import ScriptTemplate >>> config = { ... "sh_interpreter": "/bin/bash", ... "module_actions": ["load python", "load netcdf"], ... "export_vars": {"PATH": "/usr/local/bin:$PATH"} ... } ->>> batch = BatchScriptTemplate(config) ->>> script = batch.render() - ->>> # Batch job script ->>> config.update({ -... "batch_system": "slurm", -... "job": { -... "name": "test_job", -... "tasks": 4, -... "time": "01:00:00", -... } -... }) ->>> batch = BatchScriptTemplate(config) +>>> batch = ScriptTemplate(config) >>> script = batch.render() See Also @@ -46,13 +34,13 @@ import importlib.resources # import importlib.resources as pkg_resources from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional, Set import dpath from jinja2 import Environment, FileSystemLoader -def clean_env_var_name(name: str) -> str: +def _clean_env_export_vars_name(name: str) -> str: """ Remove enclosed suffixes from environment variable names. @@ -74,13 +62,13 @@ def clean_env_var_name(name: str) -> str: Examples -------- - >>> clean_env_var_name("PATH[(1)]") + >>> _clean_env_export_vars_name("PATH[(1)]") 'PATH' - >>> clean_env_var_name("PYTHONPATH[(42)]") + >>> _clean_env_export_vars_name("PYTHONPATH[(42)]") 'PYTHONPATH' - >>> clean_env_var_name("NORMAL_VAR") + >>> _clean_env_export_vars_name("NORMAL_VAR") 'NORMAL_VAR' - >>> clean_env_var_name("MY_LIST[(list)]") + >>> _clean_env_export_vars_name("MY_LIST[(list)]") 'MY_LIST' Notes @@ -137,35 +125,35 @@ def _find_keys_with_substring(d: dict[Any, Any], substring: str) -> list[str]: return matches -def finialize_config_with_env_changes( - config: dict, setting: Optional[str] = None -) -> dict: - if setting not in ["runtime", "compiletime"]: - raise ValueError( - f"Can only finialize a configuration for ``runtime`` or ``compiletime``, got {setting=}!" - ) - config = copy.deepcopy(config) - environment_changes_keys = _find_keys_with_substring(config, "environment_changes") - xtime_keys = _find_keys_with_substring(config, f"{setting}_environment_changes") - all_change_keys = set((environment_changes_keys, xtime_keys)) - if all_change_keys: - breakpoint() - return config - - class EnvironmentChangeManager: - _VALID_PHASES: Tuple(str, ...) = ( + _VALID_PHASES: Set[str] = { "runtime", "compiletime", - ) - """tuple : The phases which the EnvironmentChangeManager can be usefully applied to""" + } + """set : The phases which the EnvironmentChangeManager can be usefully applied to""" def __init__(self, execution_phase: str): - if execution_phase not in self._SUPPORTED_JOB_TIMES: - raise TypeError(f"Please use one of {_VALID_PHASES} for the {__class__}") + if execution_phase not in self._VALID_PHASES: + raise TypeError( + f"Please use one of {self._VALID_PHASES} for the {__class__.__name__}" + ) + self.execution_phase = execution_phase + + def finialize_config_with_env_changes(self, config: dict) -> dict: + config = copy.deepcopy(config) + environment_changes_keys = _find_keys_with_substring( + config, "environment_changes" + ) + xtime_keys = _find_keys_with_substring( + config, f"{self.execution_phase}_environment_changes" + ) + all_change_keys = set((environment_changes_keys, xtime_keys)) + if all_change_keys: + breakpoint() + return config -class BatchScriptTemplate: +class ScriptTemplate: """ A template engine for generating shell and batch job scripts. @@ -181,9 +169,6 @@ class BatchScriptTemplate: Expected keys include: - sh_interpreter : str Shell interpreter to use (default: /bin/bash) - - batch_system : str, optional - Either 'slurm' or 'pbs'. If not provided, generates a simple - shell script without batch headers. - module_actions : list List of module commands to execute - export_vars : dict @@ -193,6 +178,8 @@ class BatchScriptTemplate: template_dir : Path, optional Directory containing custom Jinja2 templates for script generation. If not provided, uses the default templates from the package. + template_name : str, optional + The default template name to use when creating new scripts. Attributes ---------- @@ -207,13 +194,12 @@ class BatchScriptTemplate: ----- The template directory should contain the following structure: templates/ - ├── base.sh.j2 # Base shell script template - ├── headers/ - │ ├── slurm.sh.j2 # SLURM header template - │ └── pbs.sh.j2 # PBS header template - ├── module_actions.sh.j2 # Module loading template - ├── exports.sh.j2 # Environment exports template - └── unset.sh.j2 # Unset variables template + ├── base.sh.j2 # Base shell script template + ├── slurm.sh.j2 # Base shell script template for SLURM scripts + ├── pbs.sh.j2 # Base shell script template for PBS scripts + ├── module_actions.sh.j2 # Module loading template + ├── exports.sh.j2 # Environment exports template + └── unset.sh.j2 # Unset variables template Examples -------- @@ -222,42 +208,24 @@ class BatchScriptTemplate: ... "module_actions": ["load intel"], ... "export_vars": {"OMP_NUM_THREADS": "4"} ... } - >>> batch = BatchScriptTemplate(config) + >>> batch = ScriptTemplate(config) >>> print(batch.render()) #!/bin/bash module load intel export OMP_NUM_THREADS=4 - SLURM job script: - >>> config.update({ - ... "batch_system": "slurm", - ... "job": { - ... "name": "test_job", - ... "tasks": 4, - ... "time": "01:00:00", - ... "partition": "compute" - ... } - ... }) - >>> batch = BatchScriptTemplate(config) - >>> print(batch.render()) - #!/bin/bash -l - #SBATCH --job-name=test_job - #SBATCH --ntasks=4 - #SBATCH --time=01:00:00 - #SBATCH --partition=compute - - module load intel - export OMP_NUM_THREADS=4 - See Also -------- jinja2.Environment : The template engine used by this class dpath : Library used for deep dictionary updates """ + DEFAULT_TEMPLATE = "base.sh.j2" + """str: The name of the default template file to use when rendering a script""" + def __init__( - self, config: Dict[str, Any], template_dir: Optional[Path] = None + self, config: Dict[str, Any], template_dir: Optional[Path] = None, template_name: Optional[str] = None ) -> None: """ Initialize the BatchScriptTemplate with configuration and optional templates. @@ -268,20 +236,10 @@ def __init__( Configuration dictionary for script setup. template_dir : Path, optional Path to custom template directory. If None, uses package defaults. + template_name : str, optional + The name of the template to use within the template_dir. """ self.config = config - self.batch_system = config.get("batch_system", "").lower() or None - - if self.batch_system and self.batch_system not in ["slurm", "pbs"]: - raise ValueError( - "batch_system, if specified, must be either 'slurm' or 'pbs'" - ) - - # FIXME(PG): This is a nice idea, but it doesn't fit here... - # if self.batch_system and "job" not in config: - # raise ValueError( - # "job configuration is required when batch_system is specified" - # ) if template_dir is None: # Use the package's default templates @@ -292,12 +250,14 @@ def __init__( importlib.resources.files("esm_environment") ) as template_path: template_dir = template_path / "templates" + if template_name is None: + self.template_name = self.DEFAULT_TEMPLATE self.env = Environment( loader=FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True ) # Add custom jinja2 filters: - self.env.filters["clean_env_var"] = clean_env_var_name + self.env.filters["clean_env_var"] = _clean_env_export_vars_name # Post inits: clean up and finialize the configuration: self._post_init_add_esm_var() @@ -340,7 +300,7 @@ def _post_init_add_esm_var(self): self.config["export_vars"] = {"ENVIRONMENT_SET_BY_ESMTOOLS": "TRUE"} def render( - self, include_set_e: bool = True, tail_commands: Optional[list] = None + self, include_set_e: bool = True, commands: Optional[list] = None ) -> str: """ Render a complete script, optionally including batch system headers. @@ -364,11 +324,15 @@ def render( ... "module_actions": ["load intel/2020"], ... "export_vars": {"MPI_ROOT": "/opt/mpi"} ... } - >>> script_template = BatchScriptTemplate(config) + >>> script_template = ScriptTemplate(config) >>> script = script_template.render() Notes ----- + All keys in the configuration are passed into Jinja to be used for + rendering the script. Unused keys in the configuration will not + raise errors. + The rendered script will include sections for: 1. Shell interpreter specification 2. Batch system headers (if batch_system is specified) @@ -383,42 +347,18 @@ def render( jinja2.TemplateError If there are syntax errors in the templates """ - if tail_commands is None: - tail_commands = [] - script_parts = [] - - # Add batch system header if specified - if self.batch_system: - header_template = self.env.get_template( - f"headers/{self.batch_system}.sh.j2" - ) - header = header_template.render(job=self.config.get("job", {})) - script_parts.append(header) + if commands is None: + commands = [] - # Add environment setup - env_template = self.env.get_template("base.sh.j2") - environment = env_template.render( - sh_interpreter=self.config.get("sh_interpreter", "/bin/bash"), + script_template = self.env.get_template(self.template_name) + script = script_template.render( include_set_e=include_set_e, - module_actions=self.config.get("module_actions", []), - export_vars=self.config.get("export_vars", {}), - unset_vars=self.config.get("unset_vars", []), - general_actions=self.config.get("general_actions", []), - spack_actions=self.config.get("spack_actions", []), + **self.config, ) + script_parts = script.split("\n") - # Since batch system was added at the very beginning, we need to make sure - # that the shell interpreter line is still at the top: - environment = environment.split("\n") - shell_interpreter = environment.pop(0) # Remove shell interpreter line - for env_statement in environment: - script_parts.append(env_statement) - - # Add shell interpreter line at the beginning - script_parts.insert(0, shell_interpreter) - - for tail_command in tail_commands: - script_parts.append(tail_command) + for command in commands: + script_parts.append(command) return "\n".join(script_parts) @@ -437,7 +377,7 @@ def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> Non Examples -------- Simple update: - >>> batch = BatchScriptTemplate({"sh_interpreter": "/bin/bash"}) + >>> batch = ScriptTemplate({"sh_interpreter": "/bin/bash"}) >>> batch.update_config({"module_actions": ["load intel"]}) Nested update: @@ -448,7 +388,7 @@ def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> Non ... "nested": {"key": "old_value"} ... } ... } - >>> batch = BatchScriptTemplate(initial_config) + >>> batch = ScriptTemplate(initial_config) >>> print("Initial config:", batch.config) Initial config: { 'export_vars': { @@ -479,20 +419,14 @@ def update_config(self, new_config: Dict[str, Any], separator: str = "/") -> Non -------- dpath : Library used for the deep dictionary updates """ - dpath.util.merge(self.config, new_config, separator=separator) - # Update batch_system if it was changed - if "batch_system" in new_config: - self.batch_system = self.config.get("batch_system", "").lower() or None - if self.batch_system and self.batch_system not in ["slurm", "pbs"]: - raise ValueError( - "batch_system, if specified, must be either 'slurm' or 'pbs'" - ) + dpath.merge(self.config, new_config, separator=separator) + # NOTE(PG): This method is mostly for backwards-compatability... def write_dummy_script(self, include_set_e: bool = True) -> None: """ Writes a dummy script containing only the header information, module commands, and export variables. The actual compile/configure commands - are added later. + are not included and can be added later by re-opening the file. Parameters ---------- @@ -503,3 +437,42 @@ def write_dummy_script(self, include_set_e: bool = True) -> None: """ with open("dummy_script.sh", "w") as script_file: script_file.write(self.render(include_set_e=include_set_e)) + + +class BatchScriptTemplate(ScriptTemplate): + DEFAULT_TEMPLATE = "batch_system.sh.j2" + """str: The name of the default template file to use when rendering a script""" + + def __init__(self, config): + super().__init__(config) + self.batch_system = config.get("batch_system", "").lower() or None + + def update_config(self, new_config: Dict [str, Any], separator: str ="/"): + super().update_config(new_config, separator) + # Update batch_system if it was changed + if "batch_system" in new_config: + self.batch_system = self.config.get("batch_system", "").lower() or None + if self.batch_system and self.batch_system not in ["slurm", "pbs"]: + raise ValueError( + "batch_system, if specified, must be either 'slurm' or 'pbs'" + ) + +class SLURMBatchScriptTemplate(BatchScriptTemplate): + DEFAULT_TEMPLATE = "slurm.sh.j2" + """str: The name of the default template file to use when rendering a script""" + + def __init__(self, config): + super().__init__(config) + + if self.batch_system and self.batch_system != "slurm": + raise ValueError("batch_system, if specified, must be 'slurm'") + +class PBSBatchScriptTemplate(BatchScriptTemplate) + DEFAULT_TEMPLATE = "pbs.sh.j2" + """str: The name of the default template file to use when rendering a script""" + + def __init__(self, config): + super().__init__(config) + + if self.batch_system and self.batch_system != "pbs": + raise ValueError("batch_system, if specified, must be 'pbs'") From 415866f9663ccb88abfb3ef1f692e3488949ea90 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 19 Feb 2025 09:46:45 +0100 Subject: [PATCH 8/8] wip: more cleaning --- src/esm_environment/esm_environment.py | 35 ++++++++++++++++++++------ 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/esm_environment/esm_environment.py b/src/esm_environment/esm_environment.py index ae1e61557..0b2abca08 100644 --- a/src/esm_environment/esm_environment.py +++ b/src/esm_environment/esm_environment.py @@ -225,7 +225,10 @@ class ScriptTemplate: """str: The name of the default template file to use when rendering a script""" def __init__( - self, config: Dict[str, Any], template_dir: Optional[Path] = None, template_name: Optional[str] = None + self, + config: Dict[str, Any], + template_dir: Optional[Path] = None, + template_name: Optional[str] = None, ) -> None: """ Initialize the BatchScriptTemplate with configuration and optional templates. @@ -264,13 +267,25 @@ def __init__( @classmethod def from_complete_config( - cls, config: dict, template_dir: Optional[Path] = None - ) -> "BatchScriptTemplate": + cls, + config: dict, + template_dir: Optional[Path] = None, + template_name: Optional[str] = None, + ) -> "ScriptTemplate": """Initializes a BatchScriptTemplate from a full simulation config This extracts only the computer key of a complete simulation config and uses it to create the BatchScriptTemplate. Mirrors old behaviour. + Parameters + ---------- + config : Dict[str, Any] + The complete configuration dictionary. + template_dir : Path, optional + Path to custom template directory. If None, uses package defaults. + template_name : str, optional + The name of the template to use within the template_dir. + Raises ------ KeyError : @@ -278,7 +293,11 @@ def from_complete_config( constructor won't work! """ if "computer" in config: - return cls(config["computer"], template_dir=template_dir) + return cls( + config["computer"], + template_dir=template_dir, + template_name=template_name, + ) raise KeyError( "Must give a complete simulation config containing a ``computer`` key!" ) @@ -300,7 +319,7 @@ def _post_init_add_esm_var(self): self.config["export_vars"] = {"ENVIRONMENT_SET_BY_ESMTOOLS": "TRUE"} def render( - self, include_set_e: bool = True, commands: Optional[list] = None + self, include_set_e: bool = True, commands: Optional[list] = None ) -> str: """ Render a complete script, optionally including batch system headers. @@ -447,7 +466,7 @@ def __init__(self, config): super().__init__(config) self.batch_system = config.get("batch_system", "").lower() or None - def update_config(self, new_config: Dict [str, Any], separator: str ="/"): + def update_config(self, new_config: Dict[str, Any], separator: str = "/"): super().update_config(new_config, separator) # Update batch_system if it was changed if "batch_system" in new_config: @@ -457,6 +476,7 @@ def update_config(self, new_config: Dict [str, Any], separator: str ="/"): "batch_system, if specified, must be either 'slurm' or 'pbs'" ) + class SLURMBatchScriptTemplate(BatchScriptTemplate): DEFAULT_TEMPLATE = "slurm.sh.j2" """str: The name of the default template file to use when rendering a script""" @@ -467,7 +487,8 @@ def __init__(self, config): if self.batch_system and self.batch_system != "slurm": raise ValueError("batch_system, if specified, must be 'slurm'") -class PBSBatchScriptTemplate(BatchScriptTemplate) + +class PBSBatchScriptTemplate(BatchScriptTemplate): DEFAULT_TEMPLATE = "pbs.sh.j2" """str: The name of the default template file to use when rendering a script"""