-
Notifications
You must be signed in to change notification settings - Fork 20
Filelist-log file to yaml and with checksum, to be used with esm_tests for the new filedicts.py #973
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sprint/filedicts/main
Are you sure you want to change the base?
Filelist-log file to yaml and with checksum, to be used with esm_tests for the new filedicts.py #973
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ | |
| import functools | ||
| import glob | ||
| import inspect | ||
| import hashlib | ||
| import os | ||
| import pathlib | ||
| import shutil | ||
|
|
@@ -1207,17 +1208,18 @@ def from_config(cls, config: dict): | |
| sim_files[file_key] = SimulationFile.from_config( | ||
| config, f"{config_address}.{file_key}" | ||
| ) | ||
| sim_files[file_key]["component"] = component | ||
| return sim_files | ||
|
|
||
| def _gather_file_movements(self) -> None: | ||
| """Puts the methods for each file movement into the dictionary as callable values behind the `_filesystem_op` key""" | ||
| for sim_file_id, sim_file_obj in self.items(): | ||
| movement_type = sim_file_obj.get("movement_type", "cp") | ||
| if movement_type == "mv": | ||
| movement_type = sim_file_obj.get("movement_type", "copy") | ||
| if movement_type == "move": | ||
| self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "mv") | ||
| elif movement_type == "cp": | ||
| elif movement_type == "copy": | ||
| self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "cp") | ||
| elif movement_type == "ln": | ||
| elif movement_type == "link": | ||
| self[sim_file_id]["_filesystem_op"] = getattr(sim_file_obj, "ln") | ||
| else: | ||
| raise ValueError( | ||
|
|
@@ -1231,12 +1233,19 @@ def execute_filesystem_operation( | |
| for sim_file_id, sim_file_obj in self.items(): | ||
| logger.info(f"Processing {sim_file_id}") | ||
| if config["general"]["jobtype"] == "prepcompute": | ||
| src, dest = "pool", "work" | ||
| src, dest = "computer", "work" | ||
mandresm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| elif config["general"]["jobtype"] == "tidy": | ||
| src, dest = "work", "exp_tree" | ||
| else: | ||
| raise ValueError(f"Incorrect jobtype specified for {sim_file_obj}") | ||
| sim_file_obj["_filesystem_op"](src, dest) | ||
| config[sim_file_obj["component"]]["files"][sim_file_id]["src"] = ( | ||
| sim_file_obj.paths[src] | ||
| ) | ||
| config[sim_file_obj["component"]]["files"][sim_file_id]["intermediate"] = None | ||
| config[sim_file_obj["component"]]["files"][sim_file_id]["dest"] = ( | ||
| sim_file_obj.paths[dest] | ||
| ) | ||
|
Comment on lines
+1245
to
+1248
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not 100% sure about this, it's a design thing... what do you think of the following. At this point: config[sim_file_obj["component"]]["files"][sim_file_id]we have the I would say no, that job belongs somewhere else. Therefore that info also belongs somewhere else...here you are injecting extra info in the Maybe I am overthinking it. |
||
| return config | ||
|
|
||
|
|
||
|
|
@@ -1258,3 +1267,66 @@ def resolve_file_movements(config: ConfigSetup) -> ConfigSetup: | |
| sim_file_collection = SimulationFileCollection.from_config(config) | ||
| config = sim_file_collection.execute_filesystem_operation(config) | ||
| return config | ||
|
|
||
|
|
||
| def log_used_files(config: ConfigSetup) -> ConfigSetup: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks great! The only thing I don't like about this function is that does two things: 1) Gathers all the model files and then 2) writes them to a location. I need number 1 in a separate place for my unknown files, so it would be nice to break this down into smaller pieces. I don't want to need to care (at this point at least) about the recipe order yet, or we will get into the situation that we rely on behaviour of one step of the recipe to even make the next one possible (and yes, I know we cannot avoid that entirely) |
||
| """ | ||
| Logs the files moved on this current phase. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| config : ConfigSetup | ||
| The complete simulation configuration. | ||
|
|
||
| Returns | ||
| ------- | ||
| config : ConfigSetup | ||
| The complete simulation configuration, potentially modified. | ||
| """ | ||
| if config["general"].get("verbose", False): | ||
| logger.info("\n::: Logging used files") | ||
|
|
||
| filetypes = config["general"]["relevant_filetypes"] | ||
|
||
| expid = config["general"]["expid"] | ||
| it_coupled_model_name = config["general"].get("iterative_coupled_model", "") | ||
| datestamp = config["general"]["run_datestamp"] | ||
| thisrun_log_dir = config["general"]["thisrun_log_dir"] | ||
| flist_file = ( | ||
| f"{thisrun_log_dir}/{expid}_{it_coupled_model_name}filelist_{datestamp}.yaml" | ||
| ) | ||
| all_files = {} | ||
|
|
||
| for model in config["general"]["valid_model_names"] + ["general"]: | ||
| for filetype in filetypes: | ||
| model_config = config[model] | ||
| model_files = {} | ||
|
|
||
| for file_key, file_obj in model_config.get("files", {}).items(): | ||
| try: | ||
| checksum = hashlib.md5(open( | ||
| file_obj["dest"], "rb" | ||
| ).read()).hexdigest() | ||
| except FileNotFoundError as err: | ||
| checksum = None | ||
|
|
||
| model_files[file_key] = { | ||
| "source": str(file_obj["src"]), | ||
| "intermediate": file_obj["intermediate"], | ||
| "target": str(file_obj["dest"]), | ||
| "checksum": checksum, | ||
| "kind": filetype, | ||
| } | ||
|
|
||
| if config["general"].get("verbose", False): | ||
| logger.info(f"::: logging file category: {filetype}") | ||
| logger.info(f"- source: {files['src']}") | ||
| logger.info(f"- target: {files['dest']}") | ||
| helpers.print_datetime(config) | ||
|
|
||
| if model_files: | ||
| all_files[model] = model_files | ||
|
|
||
| with open(flist_file, "w") as flist: | ||
| yaml.dump(all_files, flist) | ||
|
|
||
| return config | ||
Uh oh!
There was an error while loading. Please reload this page.