Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions src/esm_runscripts/filedicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,17 +1207,18 @@ def from_config(cls, config: dict):
sim_files[file_key] = SimulationFile.from_config(
config, f"{config_address}.{file_key}"
)
sim_files[file_key]["component"] = component
return sim_files

def _gather_file_movements(self) -> None:
"""Puts the methods for each file movement into the dictionary as callable values behind the `_filesystem_op` key"""
for sim_file_id, sim_file_obj in self.items():
movement_type = sim_file_obj.get("movement_type", "cp")
if movement_type == "mv":
movement_type = sim_file_obj.get("movement_type", "copy")
if movement_type == "move":
self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "mv")
elif movement_type == "cp":
elif movement_type == "copy":
self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "cp")
elif movement_type == "ln":
elif movement_type == "link":
self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "ln")
else:
raise ValueError(
Expand All @@ -1231,12 +1232,18 @@ def execute_filesystem_operation(
for sim_file_id, sim_file_obj in self.items():
logger.info(f"Processing {sim_file_id}")
if config["general"]["jobtype"] == "prepcompute":
src, dest = "pool", "work"
src, dest = "computer", "work"
elif config["general"]["jobtype"] == "tidy":
src, dest = "work", "exp_tree"
else:
raise ValueError(f"Incorrect jobtype specified for {sim_file_obj}")
sim_file_obj["_filesystem_op"](src, dest)
config[sim_file_obj["component"]]["files"][sim_file_id]["src"] = (
sim_file_obj.paths[src]
)
config[sim_file_obj["component"]]["files"][sim_file_id]["dest"] = (
sim_file_obj.paths[dest]
)
Comment on lines +1245 to +1248
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure about this, it's a design thing...

what do you think of the following. At this point:

config[sim_file_obj["component"]]["files"][sim_file_id]

we have the SimulationFile. Should it know about the current phase of movement it is in, and potentially, if it needs an intermediate location?

I would say no, that job belongs somewhere else. Therefore that info also belongs somewhere else...here you are injecting extra info in the SimulationFile's dictionary, right? ...????

Maybe I am overthinking it.

return config


Expand All @@ -1258,3 +1265,21 @@ def resolve_file_movements(config: ConfigSetup) -> ConfigSetup:
sim_file_collection = SimulationFileCollection.from_config(config)
config = sim_file_collection.execute_filesystem_operation(config)
return config


def log_used_files(config: ConfigSetup) -> ConfigSetup:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks great! The only thing I don't like about this function is that does two things: 1) Gathers all the model files and then 2) writes them to a location. I need number 1 in a separate place for my unknown files, so it would be nice to break this down into smaller pieces. I don't want to need to care (at this point at least) about the recipe order yet, or we will get into the situation that we rely on behaviour of one step of the recipe to even make the next one possible (and yes, I know we cannot avoid that entirely)

"""
Logs the files moved on this current phase.

Parameters
----------
config : ConfigSetup
The complete simulation configuration.

Returns
-------
config : ConfigSetup
The complete simulation configuration, potentially modified.
"""

return config
56 changes: 56 additions & 0 deletions tests/test_esm_runscripts/test_filedicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,3 +1115,59 @@ def test_globbing_ln(fs):

for nf in expected_new_paths:
assert os.path.exists(nf)


def test_file_log(fs):
"""Checks that the file log is produced correctly"""
dummy_config = """
general:
expid: expid
base_dir: /some/dummy/location/
thisrun_work_dir: "/work/ollie/pgierz/some_exp/run_20010101-20011231/work"
exp_dir: "/work/ollie/pgierz/some_exp"
thisrun_dir: "/work/ollie/pgierz/some_exp/run_20010101-20011231"
all_model_filetypes: [analysis, bin, config, forcing, input, couple, log, mon, outdata, restart, viz, ignore]
jobtype: "prepcompute"
valid_model_names: ["echam"]
computer:
pool_dir: "/work/ollie/pool"
echam:
experiment_input_dir: /work/ollie/pgierz/some_exp/input/echam
thisrun_input_dir: /work/ollie/pgierz/some_exp/run_20010101-20011231/input/echam
files:
human_readable_tag_001:
kind: input
allowed_to_be_missing: True
name_in_computer: foo
path_in_computer: /work/data/pool
name_in_work: foo
path_in_work: .
movement_type: move
"""

check_log_file = """
echam:
human_readable_tag_001:
checksum: d41d8cd98f00b204e9800998ecf8427e
intermediate: null
source: /work/data/pool/foo
target: /work/ollie/pgierz/some_exp/run_20010101-20011231/work/foo
kind: input
"""
date = esm_calendar.Date("2000-01-01T00:00:00")
config = yaml.safe_load(dummy_config)
config["general"]["current_date"] = date
fs.create_dir("/work/data/pool")
fs.create_file("/work/data/pool/foo")
fs.create_dir("/work/ollie/pgierz/some_exp/run_20010101-20011231/work")

sim_files = esm_runscripts.filedicts.SimulationFileCollection.from_config(
config
)
config = sim_files.execute_filesystem_operation(config)

esm_runscripts.filedicts.log_used_files(config)

log_file = open("/work/ollie/pgierz/some_exp/run_20010101-20011231/log/expid_filelist_20010101-20011231.yaml", "r").read()

assert log_file==check_log_file