diff --git a/setup.py b/setup.py index da558cd89..0079a105d 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ "PyGithub==1.55", "colorama==0.4.5", "coloredlogs==15.0.1", # NOTE(PG): Should be removed during cleanup for loguru instead + "dpath>=2.0.7", # Maximum version for Python 3.6 support "emoji==1.7.0", "f90nml==1.4.2", "gfw-creator==0.2.2", diff --git a/src/esm_runscripts/observe.py b/src/esm_runscripts/observe.py index 11f9a3438..3740998f6 100644 --- a/src/esm_runscripts/observe.py +++ b/src/esm_runscripts/observe.py @@ -2,8 +2,8 @@ import sys import time +import dpath.util import psutil - from loguru import logger from . import database_actions, helpers, logfiles @@ -143,12 +143,35 @@ def assemble_error_list(config): frequency = int(frequency) except: frequency = 60 + if ( + "set_config_key" in config[model]["check_error"][trigger] + and "set_config_value" in config[model]["check_error"][trigger] + ): + set_config_key = config[model]["check_error"][trigger][ + "set_config_key" + ] + set_config_value = config[model]["check_error"][trigger][ + "set_config_value" + ] + else: + set_config_key = None + set_config_value = None + elif isinstance(config[model]["check_error"][trigger], str): pass else: continue error_list.append( - (trigger, search_file, method, frequency, frequency, message) + ( + trigger, + search_file, + method, + frequency, + frequency, + message, + set_config_key, + set_config_value, + ) ) config["general"]["error_list"] = error_list return config @@ -171,6 +194,8 @@ def check_for_errors(config): next_check, frequency, message, + set_config_key, + set_config_value, ) in error_check_list: warned = 0 if next_check <= time: @@ -181,8 +206,31 @@ def check_for_errors(config): if method == "warn": warned = 1 monitor_file.write("WARNING: " + message + "\n") + if ( + set_config_key is not None + and set_config_value is not None + ): + dpath.util.set( + config, + set_config_key, + set_config_value, + separator=".", + ) + logger.info( + f"Set {set_config_key}={set_config_value} in check_for_errors" + ) break elif method == "kill": + if ( + set_config_key is not None + and set_config_value is not None + ): + dpath.util.set( + config, + set_config_key, + set_config_value, + separator=".", + ) cancel_job = f"scancel {config['general']['jobid']}" monitor_file.write("ERROR: " + message + "\n") monitor_file.write("Will kill the run now..." + "\n") @@ -195,7 +243,16 @@ def check_for_errors(config): next_check += frequency if warned == 0: new_list.append( - (trigger, search_file, method, next_check, frequency, message) + ( + trigger, + search_file, + method, + next_check, + frequency, + message, + set_config_key, + set_config_value, + ) ) config["general"]["error_list"] = new_list return config