Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions cfmm2tar/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def download_studies(
keep_sorted_dicom: bool = False,
skip_derived: bool = False,
additional_tags: dict[str, str] | None = None,
use_gzip: bool = False,
) -> str:
"""
Download DICOM studies from the server and create tar archives.
Expand Down Expand Up @@ -265,6 +266,7 @@ def download_studies(
Dict maps DICOM tag (hex string) to field name.
Example: {"00100030": "PatientBirthDate", "00100040": "PatientSex"}
(default: None)
use_gzip: Create gzip-compressed tar files (.tar.gz instead of .tar) (default: False)

Returns:
Path to the output directory containing the downloaded tar files
Expand Down Expand Up @@ -315,6 +317,14 @@ def download_studies(
... "00100040": "PatientSex"
... }
... )

Download with gzip compression:
>>> download_studies(
... output_dir="/path/to/output",
... study_description="Khan^NeuroAnalytics",
... study_date="20240101",
... use_gzip=True
... )
"""
# Get credentials
username, password = _get_credentials(username, password, credentials_file)
Expand Down Expand Up @@ -352,6 +362,7 @@ def download_studies(
force_refresh_trust_store=force_refresh_trust_store,
skip_derived=skip_derived,
additional_tags=additional_tags,
use_gzip=use_gzip,
)
else:
# Single UID or wildcard
Expand All @@ -372,6 +383,7 @@ def download_studies(
force_refresh_trust_store=force_refresh_trust_store,
skip_derived=skip_derived,
additional_tags=additional_tags,
use_gzip=use_gzip,
)

# Clean up temp directory if empty
Expand All @@ -397,6 +409,7 @@ def download_studies_from_metadata(
keep_sorted_dicom: bool = False,
skip_derived: bool = False,
additional_tags: dict[str, str] | None = None,
use_gzip: bool = False,
) -> str:
"""
Download DICOM studies using UIDs from metadata.
Expand Down Expand Up @@ -479,6 +492,13 @@ def download_studies_from_metadata(
... metadata="study_metadata.tsv",
... additional_tags={"00100030": "PatientBirthDate"}
... )

Download with gzip compression:
>>> download_studies_from_metadata(
... output_dir="/path/to/output",
... metadata="study_metadata.tsv",
... use_gzip=True
... )
"""
# Get credentials
username, password = _get_credentials(username, password, credentials_file)
Expand Down Expand Up @@ -570,6 +590,7 @@ def download_studies_from_metadata(
force_refresh_trust_store=force_refresh_trust_store,
skip_derived=skip_derived,
additional_tags=additional_tags,
use_gzip=use_gzip,
)

# Clean up temp directory if empty
Expand Down
8 changes: 8 additions & 0 deletions cfmm2tar/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ def main():
action="store_true",
help="Skip DICOM files with ImageType containing DERIVED (e.g., reformats, derived images)",
)
parser.add_argument(
"--gzip",
dest="use_gzip",
action="store_true",
help="Create gzip-compressed tar files (.tar.gz instead of .tar)",
)
parser.add_argument(
"--metadata-tags",
dest="metadata_tags",
Expand Down Expand Up @@ -378,6 +384,7 @@ def main():
skip_derived=args.skip_derived,
additional_tags=additional_tags,
tls_cipher=args.tls_cipher,
use_gzip=args.use_gzip,
)
else:
# Normal mode - use search criteria (no specific UIDs provided)
Expand All @@ -399,6 +406,7 @@ def main():
skip_derived=args.skip_derived,
additional_tags=additional_tags,
tls_cipher=args.tls_cipher,
use_gzip=args.use_gzip,
)

# Clean up temp directory if empty
Expand Down
33 changes: 18 additions & 15 deletions cfmm2tar/dicom_sorter.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def sort(self):

return sorted_dirs

def tar(self, depth, tar_filename_sep="_"):
def tar(self, depth, tar_filename_sep="_", use_gzip=False):
"""
extract, apply sort rule, unwrap non-imaging dicom files, and create tar files(imaging->*.tar,non-imaging->*.attached.tar)

Expand All @@ -272,6 +272,7 @@ def tar(self, depth, tar_filename_sep="_"):
given depth = 4,
tar filename is: project_study_date_patient_name_study_id.tar
tar_filename_sep: seprator of the tar file name elements
use_gzip: if True, create gzip-compressed tar files (.tar.gz)

output:
tar_full_filename_list: list of resulted tar filenames
Expand Down Expand Up @@ -320,12 +321,14 @@ def tar(self, depth, tar_filename_sep="_"):
# dir_split: ['PI','Project','19700101','1970_01_01_T2','1.9AC66A0D','0003','1970_01_01_T2.MR.PI_project.0003.0194.19700101.D6C44EC8.dcm']
dir_split = relative_path_new_filename.split(os.sep)

tar_filename = tar_filename_sep.join(dir_split[:depth]) + ".tar"
tar_ext = ".tar.gz" if use_gzip else ".tar"
tar_filename = tar_filename_sep.join(dir_split[:depth]) + tar_ext
tar_full_filename = os.path.join(self.output_dir, tar_filename)
tar_full_filename_dict[tar_full_filename].append(item)

tar_mode = "w:gz" if use_gzip else "w"
for tar_full_filename, items in tar_full_filename_dict.items():
with tarfile.open(tar_full_filename, "w") as tar:
with tarfile.open(tar_full_filename, tar_mode) as tar:
for item in items:
original_full_filename = item[0]
relative_path_new_filename = item[1]
Expand All @@ -334,7 +337,8 @@ def tar(self, depth, tar_filename_sep="_"):
tar.add(original_full_filename, arcname=arcname)

# tar non-imaging:
attached_tar_full_filenames = []
# Collect all unwrapped dirs for each attached tar file
attached_tar_dict = defaultdict(list)
for item in before_after_sort_rule_list:
original_full_filename = item[0]
relative_path_new_filename = item[1]
Expand All @@ -343,20 +347,19 @@ def tar(self, depth, tar_filename_sep="_"):

if unwraped_dir:
dir_split = relative_path_new_filename.split(os.sep)
attached_tar_filename = tar_filename_sep.join(dir_split[:depth]) + ".attached.tar"
attached_tar_ext = ".attached.tar.gz" if use_gzip else ".attached.tar"
attached_tar_filename = tar_filename_sep.join(dir_split[:depth]) + attached_tar_ext
attached_tar_full_filename = os.path.join(self.output_dir, attached_tar_filename)

tar_arcname = relative_path_new_filename + "_unwraped"
attached_tar_dict[attached_tar_full_filename].append((unwraped_dir, tar_arcname))

if attached_tar_full_filename not in attached_tar_full_filenames:
with tarfile.open(attached_tar_full_filename, "w") as tar:
tar.add(unwraped_dir, arcname=tar_arcname)

attached_tar_full_filenames.append(attached_tar_full_filename)

else:
with tarfile.open(attached_tar_full_filename, "a") as tar:
tar.add(unwraped_dir, arcname=tar_arcname)
# Write all attached tar files
attached_tar_full_filenames = []
for attached_tar_full_filename, items in attached_tar_dict.items():
with tarfile.open(attached_tar_full_filename, tar_mode) as tar:
for unwraped_dir, tar_arcname in items:
tar.add(unwraped_dir, arcname=tar_arcname)
attached_tar_full_filenames.append(attached_tar_full_filename)

return list(tar_full_filename_dict.keys()) + attached_tar_full_filenames

Expand Down
9 changes: 7 additions & 2 deletions cfmm2tar/retrieve_cfmm_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def main(
skip_derived=False,
additional_tags=None,
tls_cipher="TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
use_gzip=False,
):
"""
main workflow: for each study: query,retrieve,tar
Expand Down Expand Up @@ -194,7 +195,7 @@ def main(
# according to CFMM's rule, folder depth is 5:
# pi/project/study_date/patient/studyID_and_hash_studyInstanceUID
# a list with one element, retrieved_dicom_dir contain's one study
tar_full_filenames = d.tar(5)
tar_full_filenames = d.tar(5, use_gzip=use_gzip)

# if there is no dicom files in the retrieved folder, tar_full_filenames is None
if not tar_full_filenames:
Expand All @@ -205,7 +206,11 @@ def main(
logger.info(f"tar file created: {tar_full_filename}")

# .uid file
uid_full_filename = tar_full_filename[:-3] + "uid"
# Strip .tar or .tar.gz extension to add .uid
if tar_full_filename.endswith(".tar.gz"):
uid_full_filename = tar_full_filename[:-7] + ".uid"
else:
uid_full_filename = tar_full_filename[:-4] + ".uid"
with open(uid_full_filename, "w") as f:
f.write(StudyInstanceUID + "\n")

Expand Down
Loading