Skip to content

Commit

Permalink
1. Added replace_key_with_dataset_id function to delete deaccession…
Browse files Browse the repository at this point in the history
… dataset from the failed uris metadata dict, avoding confusion. (#11)
  • Loading branch information
kenlhlui authored Feb 7, 2025
1 parent 1cfbd74 commit 819dd9e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
21 changes: 21 additions & 0 deletions dvmeta/func.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,24 @@ def replace_key_with_dataset_id(dictionary: dict) -> dict:
# Keep the original key if 'id' is missing
new_dict[old_key] = value
return new_dict


def rm_dd_from_failed_uris(failed_uris: dict, pid_dict_dd: dict) -> dict:
"""Remove the deaccessioned datasets from the failed_uris dictionary.
Args:
failed_uris (dict): Dictionary containing the failed URIs
pid_dict_dd (dict): Dictionary containing the deaccessioned datasets metadata
Returns:
dict: Dictionary containing the failed URIs without the deaccessioned datasets
"""
# Get the datasetPersistentId from the pid_dict_dd
dd_pids = [v['datasetPersistentId'] for v in pid_dict_dd.values()]

# Loop through the dd_pids, and remove the item if it contains the pid in the key of the failed_uris
keys_to_remove = [k for k in failed_uris if any(pid in k for pid in dd_pids)]
for k in keys_to_remove:
failed_uris.pop(k)

return failed_uris
3 changes: 3 additions & 0 deletions dvmeta/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ async def main_crawler():
# Add the path_info to the metadata
meta_dict, pid_dict_dd = func.add_path_info(meta_dict, ds_dict)

# Remove the deaccessioned/draft datasets from the pid_dict_dd for the failed_metadata_uris
failed_metadata_uris = func.rm_dd_from_failed_uris(failed_metadata_uris, pid_dict_dd)

# Export the updated pid_dict_dd (Which contains deaccessioned/draft datasets) to a JSON file
pid_dict_json, pid_dict_checksum = utils.orjson_export(pid_dict_dd, 'pid_dict_dd')
json_file_checksum_dict.append(
Expand Down

0 comments on commit 819dd9e

Please sign in to comment.