-
Notifications
You must be signed in to change notification settings - Fork 29
Current expt time for running jobs in payu status #702
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
479d029
2896232
75761ba
6d091b8
df1f039
f3c1025
669493e
b50a431
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,8 @@ | |
|
|
||
| import os | ||
| import shutil | ||
| import json | ||
| import warnings | ||
|
|
||
| from payu.models.model import Model | ||
|
|
||
|
|
@@ -94,3 +96,25 @@ def get_restart_datetime(self, restart_path): | |
|
|
||
| return self.get_restart_datetime_using_submodel(restart_path, | ||
| model_types) | ||
|
|
||
| def get_cur_expt_time(self): | ||
| """Get the current experiment time from file work/atmosphere/log/matmxx.pe00000.log.""" | ||
| try: | ||
| log_path = os.path.join(self.expt.work_path, 'atmosphere', 'log', | ||
| 'matmxx.pe00000.log') | ||
|
|
||
| # Read out the latest `cur_exp-datetime` from the log file | ||
| if os.path.exists(log_path): | ||
| with open(log_path, 'r') as f: | ||
| for line in reversed(f.readlines()): | ||
| if 'cur_exp-datetime' in line: | ||
| cur_expt_time = json.loads(line)['cur_exp-datetime'] | ||
| return cur_expt_time | ||
|
|
||
| warnings.warn(f"Log file {log_path} does not exist or does not contain current model time.") | ||
| return None | ||
|
|
||
| except KeyError as e: | ||
|
||
| warnings.warn('Error getting current experiment time: {}'.format(e)) | ||
| return None | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -422,6 +422,27 @@ def get_components(self): | |
| "Access-OM3 comprises a data runoff model, but the runoff model in nuopc.runconfig is set " | ||
| f"to {self.components['rof']}." | ||
| ) | ||
|
|
||
| def get_cur_expt_time(self): | ||
| """Get the current experiment time from file work/log/med.log.""" | ||
| try: | ||
| log_path = os.path.join(self.expt.work_path, 'log', 'med.log') | ||
|
|
||
| # Read out the latest `cur_exp-datetime` from the log file | ||
| if os.path.exists(log_path): | ||
| with open(log_path, 'r') as f: | ||
| for line in reversed(f.readlines()): | ||
| if line.startswith(" memory_write: model date"): | ||
|
||
| cur_expt_time = line.split()[4] | ||
| return cur_expt_time | ||
|
|
||
| warn(f"Log file {log_path} does not exist or does not contain current model time.") | ||
| return None | ||
|
|
||
| except KeyError as e: | ||
|
||
| warn('Error getting current experiment time: {}'.format(e)) | ||
| return None | ||
|
|
||
|
|
||
|
|
||
| class Runconfig: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -449,3 +449,31 @@ def get_restart_datetime_using_submodel(self, restart_path, model_types): | |
| f'{self.model_type} date-based restart pruning requires one of ' | ||
| 'these sub-models to determine restart dates.' | ||
| ) | ||
|
|
||
| def get_cur_expt_time(self): | ||
| """For model not implemented experiment time calculate/read-out, | ||
| leaves a warning and returns None.""" | ||
| print("Getting current experiment time is not yet implemented.") | ||
|
||
| return None | ||
|
|
||
| def get_cur_expt_time_using_submodel(self, model_types): | ||
| """ | ||
| Use a specified submodel's get_cur_expt_time method | ||
|
|
||
| Parameters | ||
| ---------- | ||
| model_types: List of submodels in order of priority. Use first | ||
| submodel in model_types that is present in the experiment. | ||
|
|
||
| Returns: | ||
| -------- | ||
| Current experiment time in string (e.g., 1900-01-01T01:00:00) | ||
|
||
| """ | ||
| for model_type in model_types: | ||
| for model in self.expt.models: | ||
| if model.model_type == model_type and hasattr(model, 'get_cur_expt_time'): | ||
blimlim marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| cur_expt_time = model.get_cur_expt_time() | ||
| if cur_expt_time is not None: | ||
| return cur_expt_time | ||
|
|
||
| return None | ||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |||
| from payu.fsops import read_config | ||||
| from payu.metadata import MetadataWarning, Metadata | ||||
| from payu.laboratory import Laboratory | ||||
| from payu.experiment import Experiment | ||||
| import payu.subcommands.args as args | ||||
| from payu.status import ( | ||||
| build_job_info, | ||||
|
|
@@ -31,21 +32,19 @@ def runcmd(lab_path, config_path, json_output, | |||
| # Suppress output to os.devnull | ||||
| with redirect_stdout(open(os.devnull, 'w')): | ||||
| # Determine archive path | ||||
| lab = Laboratory(lab_path) | ||||
| lab = Laboratory(config_path=config_path, lab_path=lab_path) | ||||
| warnings.filterwarnings("error", category=MetadataWarning) | ||||
| try: | ||||
| metadata = Metadata(Path(lab.archive_path), | ||||
| config_path=config_path) | ||||
| metadata.setup() | ||||
| expt = Experiment(lab, config_path=config_path) | ||||
| expt.init_models() | ||||
|
||||
| expt.init_models() |
No need to run this as init_models() is already run as part of Experiment initialisation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh I see. Thanks for pointing this out :)
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we skip get_model_cur_expt_time() this step if stage is payu setup:
e.g.
$ payu status
/g/data/tm70/as2285/payu-dev/payu-dev/lib64/python3.11/site-packages/payu/models/cesm_cmeps.py:439: UserWarning: Log file /scratch/tm70/as2285/access-om3/work/1112-update-25km-topo/log/med.log does not exist or does not contain current model time.
========================================
Run: 0
Job ID: 163373011.gadi-pbs
Stage: setup
Job File: /scratch/tm70/as2285/access-om3/archive/1112-update-25km-topo/payu_jobs/0/run/163373011.gadi-pbs.json
shows a warning when it's the expected behaviour
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or job is in the queue still
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah this could be deferred to a later point when we know the the current stage is "model-run"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for picking this up!
Following the suggestion from @jo-basevi , I have changed payu to run get_model_cur_expt_time() only during model-run stage in the latest commit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the current experiment time also be written to the json output, or would there not really be any uses for it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree it will be good to have the current expt time here as well. I have added it in the latest commit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If adding current experiment time to the json and the formatted display then it might make sense to add it in build_job_info() to reduce duplication and use status == model-run
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While testing this out I just noticed that payu status --json also doesn't print out the model finish time after the run has completed:
pay========================================
Run: 0
Job ID: 163732205.gadi-pbs
Run ID: cd891450d95f0b8454f880b80bbb9fed40c034b4
Stage: archive
Total queue time: 0h 1m 27s
Model Finish Time: 0001-01-06T00:00:00
Exit Status: 0 (Success)
Model Exit Code: 0 (Success)
Output Log: /home/565/sw6175/esm1.6/misc/gettime-test/pre-industrial.o163732205
Error Log: /home/565/sw6175/esm1.6/misc/gettime-test/pre-industrial.e163732205
Job File: /scratch/tm70/sw6175/access-esm/archive/gettime-test-dev-preindustrial+concentrations-d33399d8/payu_jobs/0/run/163732205.gadi-pbs.json
========================================
u(payu-env) [sw6175@gadi-login-01 gettime-test]$ payu status --json
{
"experiment_uuid": "d33399d8-053d-4eb6-aa5b-2b7e0ff783e5",
"runs": {
"0": {
"run": [
{
"job_id": "163732205.gadi-pbs",
"run_id": "cd891450d95f0b8454f880b80bbb9fed40c034b4",
"stage": "archive",
"exit_status": 0,
"model_exit_status": 0,
"stdout_file": "/home/565/sw6175/esm1.6/misc/gettime-test/pre-industrial.o163732205",
"stderr_file": "/home/565/sw6175/esm1.6/misc/gettime-test/pre-industrial.e163732205",
"job_file": "/scratch/tm70/sw6175/access-esm/archive/gettime-test-dev-preindustrial+concentrations-d33399d8/payu_jobs/0/run/163732205.gadi-pbs.json",
"start_time": "2026-03-23T16:23:21.697106"
}
]
}
}
}
I'm wondering whether it would also work to add the finish time in build_job_info(), though I don't think I properly understand yet how the finish time is set.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
though I don't think I properly understand yet how the finish time is set.
model_finish_time is parsed from restart files by get_model_restart_datetimes() in experiment.py when stage is changed to archive.
whether it would also work to add the finish time in build_job_info()
Sounds good! I have implemented this in the latest commit. Now payu status --json should return something like:
{
"experiment_uuid": "eaxxxx7",
"runs": {
"5": {
"run": [
{
::
"stage": "archive",
::
"start_time": "2026-03-27Txxx",
"model_finish_time": "0012-02-11T00:00:00"
}
]
}
}
}
I also confirm that model_finish_time is not written twice in job file.
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -495,7 +495,8 @@ def update_run_job_file( | |||||||
| extra_info: Optional[dict[str, Any]] = None, | ||||||||
| manifests: Optional[dict[str, Any]] = None, | ||||||||
| model_restart_datetimes: Optional[dict[str, Any]] = None, | ||||||||
| timings: Optional[dict[str, Any]] = None | ||||||||
| timings: Optional[dict[str, Any]] = None, | ||||||||
| cur_expt_time: Optional[str] = None | ||||||||
|
||||||||
| timings: Optional[dict[str, Any]] = None, | |
| cur_expt_time: Optional[str] = None | |
| timings: Optional[dict[str, Any]] = None |
This code can safely be removed now I think
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doneeee.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apologies @Qian-HuiChen, I'm think my previous suggestion for reading the start date from the
work/atmosphere/namelistsfile might not be the best one! It should work perfectly as is, but I'm thinking there might be existing methods which could be used to reduce duplication.Each UM restart directory contains a calendar yaml file
um.res.yaml, which holds a copy of the restart date (see/g/data/vk83/prerelease/configurations/inputs/access-esm1p6/modern/pre-industrial/restart/2026.02.20/atmospherefor an example).I'd forgotten that we'd previously added a method to the
um.pydriver, get_restart_datetime which reads the date from this file, and uses it for the date-based restart pruning.I'm wondering if it would make sense to reuse this method to get the start date, as it could be pointed to the copy of
um.res.yamlin theatmosphere/workdirectory.One issue is that the
um.res.yamlis technically not required to run the model, and it's possible for it to be missing. This would nearly never be the case though, but there would have to be a safety check/warning in case it is missing.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pointing this out! It is better to reuse the
get_restart_datetimethan writing a new one.I have changed the code to read out the starting date from
work/atmosphere/um.res.yaml.