Skip to content

Commit 5eedfb7

Browse files
committed
Merge branch 'develop' into feature/check_orientations
2 parents 2895e8c + b464a5d commit 5eedfb7

File tree

6 files changed

+185
-44
lines changed

6 files changed

+185
-44
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2828
### Changed
2929

3030
- Moved `pygraphviz` from requirements to `graphviz` optional dependencies group.
31+
- Automatically tag untagged `subject_id` and `unique_id` as `!!str` when loading data config files.
3132
- Made orientation configurable (was hard-coded as "RPI").
3233

3334
### Fixed

CPAC/nuisance/utils/compcor.py

Lines changed: 78 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -91,18 +91,33 @@ def cosine_filter(
9191
failure_mode="error",
9292
):
9393
"""
94-
`cosine_filter` adapted from Nipype.
94+
Apply cosine filter to the input BOLD image using the discrete cosine transform (DCT) method.
95+
96+
Adapted from nipype implementation. https://github.com/nipy/nipype/blob/d353f0d/nipype/algorithms/confounds.py#L1086-L1107
97+
It removes the low-frequency drift from the voxel time series. The filtered image is saved to disk.
9598
96-
https://github.com/nipy/nipype/blob/d353f0d/nipype/algorithms/confounds.py#L1086-L1107
9799
98100
Parameters
99101
----------
100-
input_image_path : string
101-
Bold image to be filtered.
102+
input_image_path : str
103+
Path to the BOLD image to be filtered.
102104
timestep : float
103-
'Repetition time (TR) of series (in sec) - derived from image header if unspecified'
104-
period_cut : float
105-
Minimum period (in sec) for DCT high-pass filter, nipype default value: 128.
105+
Repetition time (TR) of the series (in seconds). Derived from image header if unspecified.
106+
period_cut : float, optional
107+
Minimum period (in seconds) for the DCT high-pass filter. Default value is 128.
108+
remove_mean : bool, optional
109+
Whether to remove the mean from the voxel time series before filtering. Default is True.
110+
axis : int, optional
111+
The axis along which to apply the filter. Default is -1 (last axis).
112+
failure_mode : {'error', 'ignore'}, optional
113+
Specifies how to handle failure modes. If set to 'error', the function raises an error.
114+
If set to 'ignore', it returns the input data unchanged in case of failure. Default is 'error'.
115+
116+
Returns
117+
-------
118+
cosfiltered_img : str
119+
Path to the filtered BOLD image.
120+
106121
"""
107122
# STATEMENT OF CHANGES:
108123
# This function is derived from sources licensed under the Apache-2.0 terms,
@@ -113,6 +128,7 @@ def cosine_filter(
113128
# * Removed caluclation and return of `non_constant_regressors`
114129
# * Modified docstring to reflect local changes
115130
# * Updated style to match C-PAC codebase
131+
# * Updated to use generator and iterate over voxel time series to optimize memory usage.
116132

117133
# ORIGINAL WORK'S ATTRIBUTION NOTICE:
118134
# Copyright (c) 2009-2016, Nipype developers
@@ -132,41 +148,74 @@ def cosine_filter(
132148
# Prior to release 0.12, Nipype was licensed under a BSD license.
133149

134150
# Modifications copyright (C) 2019 - 2024 C-PAC Developers
135-
from nipype.algorithms.confounds import _cosine_drift, _full_rank
151+
try:
136152

137-
input_img = nib.load(input_image_path)
138-
input_data = input_img.get_fdata()
153+
def voxel_generator():
154+
for i in range(datashape[0]):
155+
for j in range(datashape[1]):
156+
for k in range(datashape[2]):
157+
yield input_data[i, j, k, :]
139158

140-
datashape = input_data.shape
141-
timepoints = datashape[axis]
142-
if datashape[0] == 0 and failure_mode != "error":
143-
return input_data, np.array([])
159+
from nipype.algorithms.confounds import _cosine_drift, _full_rank
144160

145-
input_data = input_data.reshape((-1, timepoints))
161+
input_img = nib.load(input_image_path)
162+
input_data = input_img.get_fdata()
163+
datashape = input_data.shape
164+
timepoints = datashape[axis]
165+
if datashape[0] == 0 and failure_mode != "error":
166+
return input_data, np.array([])
146167

147-
frametimes = timestep * np.arange(timepoints)
148-
X = _full_rank(_cosine_drift(period_cut, frametimes))[0]
168+
frametimes = timestep * np.arange(timepoints)
169+
X_full = _full_rank(_cosine_drift(period_cut, frametimes))[0]
149170

150-
betas = np.linalg.lstsq(X, input_data.T)[0]
171+
# Generate X with and without the mean column
172+
X_with_mean = X_full
173+
X_without_mean = X_full[:, :-1] if X_full.shape[1] > 1 else X_full
151174

152-
if not remove_mean:
153-
X = X[:, :-1]
154-
betas = betas[:-1]
175+
# Reshape the input data to bring the time dimension to the last axis if it's not already
176+
if axis != -1:
177+
reshaped_data = np.moveaxis(input_data, axis, -1)
178+
else:
179+
reshaped_data = input_data
180+
181+
reshaped_output_data = np.zeros_like(reshaped_data)
182+
183+
# Choose the appropriate X matrix
184+
X = X_without_mean if remove_mean else X_with_mean
155185

156-
residuals = input_data - X.dot(betas).T
186+
voxel_gen = voxel_generator()
157187

158-
output_data = residuals.reshape(datashape)
188+
for i in range(reshaped_data.shape[0]):
189+
IFLOGGER.info(
190+
f"calculating {i+1} of {reshaped_data.shape[0]} row of voxels"
191+
)
192+
for j in range(reshaped_data.shape[1]):
193+
for k in range(reshaped_data.shape[2]):
194+
voxel_time_series = next(voxel_gen)
195+
betas = np.linalg.lstsq(X, voxel_time_series.T, rcond=None)[0]
196+
197+
residuals = voxel_time_series - X.dot(betas)
198+
reshaped_output_data[i, j, k, :] = residuals
199+
200+
# Move the time dimension back to its original position if it was reshaped
201+
if axis != -1:
202+
output_data = np.moveaxis(reshaped_output_data, -1, axis)
203+
else:
204+
output_data = reshaped_output_data
159205

160-
hdr = input_img.header
161-
output_img = nib.Nifti1Image(output_data, header=hdr, affine=input_img.affine)
206+
hdr = input_img.header
207+
output_img = nib.Nifti1Image(output_data, header=hdr, affine=input_img.affine)
208+
file_name = input_image_path[input_image_path.rindex("/") + 1 :]
162209

163-
file_name = input_image_path[input_image_path.rindex("/") + 1 :]
210+
cosfiltered_img = os.path.join(os.getcwd(), file_name)
164211

165-
cosfiltered_img = os.path.join(os.getcwd(), file_name)
212+
output_img.to_filename(cosfiltered_img)
166213

167-
output_img.to_filename(cosfiltered_img)
214+
return cosfiltered_img
168215

169-
return cosfiltered_img
216+
except Exception as e:
217+
message = f"Error in cosine_filter: {e}"
218+
IFLOGGER.error(message)
170219

171220

172221
def fallback_svd(a, full_matrices=True, compute_uv=True):

CPAC/utils/bids_utils.py

Lines changed: 71 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,13 @@
1414

1515
# You should have received a copy of the GNU Lesser General Public
1616
# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
17+
from base64 import b64decode
18+
from collections.abc import Iterable
1719
import json
1820
import os
1921
import re
2022
import sys
23+
from typing import Any, Callable, Optional
2124
from warnings import warn
2225

2326
from botocore.exceptions import BotoCoreError
@@ -26,6 +29,16 @@
2629
from CPAC.utils.monitoring import UTLOGGER
2730

2831

32+
class SpecifiedBotoCoreError(BotoCoreError):
33+
"""Specified :py:class:`~botocore.exceptions.BotoCoreError`."""
34+
35+
def __init__(self, msg: str, *args, **kwargs) -> None:
36+
"""Initialize BotoCoreError with message."""
37+
msg = msg.format(**kwargs)
38+
Exception.__init__(self, msg)
39+
self.kwargs = kwargs
40+
41+
2942
def bids_decode_fname(file_path, dbg=False, raise_error=True):
3043
f_dict = {}
3144

@@ -842,7 +855,7 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""):
842855
f"Error retrieving {s3_obj.key.replace(prefix, '')}"
843856
f" ({e.message})"
844857
)
845-
raise BotoCoreError(msg) from e
858+
raise SpecifiedBotoCoreError(msg) from e
846859
elif "nii" in str(s3_obj.key):
847860
file_paths.append(
848861
str(s3_obj.key).replace(prefix, "").lstrip("/")
@@ -868,9 +881,15 @@ def collect_bids_files_configs(bids_dir, aws_input_creds=""):
868881
): json.load(open(os.path.join(root, f), "r"))
869882
}
870883
)
871-
except UnicodeDecodeError:
884+
except UnicodeDecodeError as unicode_decode_error:
872885
msg = f"Could not decode {os.path.join(root, f)}"
873-
raise UnicodeDecodeError(msg)
886+
raise UnicodeDecodeError(
887+
unicode_decode_error.encoding,
888+
unicode_decode_error.object,
889+
unicode_decode_error.start,
890+
unicode_decode_error.end,
891+
msg,
892+
)
874893

875894
if not file_paths and not config_dict:
876895
msg = (
@@ -983,15 +1002,35 @@ def insert_entity(resource, key, value):
9831002
return "_".join([*new_entities[0], f"{key}-{value}", *new_entities[1], suff])
9841003

9851004

986-
def load_yaml_config(config_filename, aws_input_creds):
1005+
def apply_modifications(
1006+
yaml_contents: str, modifications: Optional[list[Callable[[str], str]]]
1007+
) -> str:
1008+
"""Apply modification functions to YAML contents"""
1009+
if modifications:
1010+
for modification in modifications:
1011+
yaml_contents = modification(yaml_contents)
1012+
return yaml_contents
1013+
1014+
1015+
def load_yaml_config(
1016+
config_filename: str,
1017+
aws_input_creds,
1018+
modifications: Optional[list[Callable[[str], str]]] = None,
1019+
) -> dict | list | str:
1020+
"""Load a YAML config file, possibly from AWS, with modifications applied.
1021+
1022+
`modifications` should be a list of functions that take a single string argument (the loaded YAML contents) and return a single string argument (the modified YAML contents).
1023+
"""
9871024
if config_filename.lower().startswith("data:"):
9881025
try:
989-
header, encoded = config_filename.split(",", 1)
990-
config_content = b64decode(encoded)
1026+
_header, encoded = config_filename.split(",", 1)
1027+
config_content = apply_modifications(
1028+
b64decode(encoded).decode("utf-8"), modifications
1029+
)
9911030
return yaml.safe_load(config_content)
992-
except:
1031+
except Exception:
9931032
msg = f"Error! Could not find load config from data URI {config_filename}"
994-
raise BotoCoreError(msg)
1033+
raise SpecifiedBotoCoreError(msg=msg)
9951034

9961035
if config_filename.lower().startswith("s3://"):
9971036
# s3 paths begin with s3://bucket/
@@ -1013,7 +1052,8 @@ def load_yaml_config(config_filename, aws_input_creds):
10131052
config_filename = os.path.realpath(config_filename)
10141053

10151054
try:
1016-
return yaml.safe_load(open(config_filename, "r"))
1055+
with open(config_filename, "r") as _f:
1056+
return yaml.safe_load(apply_modifications(_f.read(), modifications))
10171057
except IOError:
10181058
msg = f"Error! Could not find config file {config_filename}"
10191059
raise FileNotFoundError(msg)
@@ -1110,6 +1150,25 @@ def create_cpac_data_config(
11101150
return sub_list
11111151

11121152

1153+
def _check_value_type(
1154+
sub_list: list[dict[str, Any]],
1155+
keys: list[str] = ["subject_id", "unique_id"],
1156+
value_type: type = int,
1157+
any_or_all: Callable[[Iterable], bool] = any,
1158+
) -> bool:
1159+
"""Check if any or all of a key in a sub_list is of a given type."""
1160+
return any_or_all(
1161+
isinstance(sub.get(key), value_type) for key in keys for sub in sub_list
1162+
)
1163+
1164+
1165+
def coerce_data_config_strings(contents: str) -> str:
1166+
"""Coerge `subject_id` and `unique_id` to be strings."""
1167+
for key in ["subject_id: ", "unique_id: "]:
1168+
contents = re.sub(f"{key}(?!!!)", f"{key}!!str ", contents)
1169+
return contents.replace(": !!str !!", ": !!")
1170+
1171+
11131172
def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds):
11141173
"""
11151174
Loads the file as a check to make sure it is available and readable.
@@ -1127,7 +1186,9 @@ def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds)
11271186
-------
11281187
list
11291188
"""
1130-
sub_list = load_yaml_config(data_config_file, aws_input_creds)
1189+
sub_list: list[dict[str, str]] = load_yaml_config(
1190+
data_config_file, aws_input_creds, modifications=[coerce_data_config_strings]
1191+
)
11311192

11321193
if participant_labels:
11331194
sub_list = [

CPAC/utils/tests/configs/__init__.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11
"""Configs for testing."""
22

3-
from pathlib import Path
3+
from importlib import resources
4+
5+
try:
6+
from importlib.resources.abc import Traversable
7+
except ModuleNotFoundError: # TODO: Remove this block once minimum Python version includes `importlib.resources.abc`
8+
from importlib.abc import Traversable
49

5-
from pkg_resources import resource_filename
610
import yaml
711

8-
_TEST_CONFIGS_PATH = Path(resource_filename("CPAC", "utils/tests/configs"))
9-
with open(_TEST_CONFIGS_PATH / "neurostars_23786.yml", "r", encoding="utf-8") as _f:
12+
_TEST_CONFIGS_PATH: Traversable = resources.files("CPAC").joinpath(
13+
"utils/tests/configs"
14+
)
15+
with (_TEST_CONFIGS_PATH / "neurostars_23786.yml").open("r", encoding="utf-8") as _f:
1016
# A loaded YAML file to test https://tinyurl.com/neurostars23786
1117
NEUROSTARS_23786 = _f.read()
12-
with open(_TEST_CONFIGS_PATH / "neurostars_24035.yml", "r", encoding="utf-8") as _f:
18+
with (_TEST_CONFIGS_PATH / "neurostars_24035.yml").open("r", encoding="utf-8") as _f:
1319
# A loaded YAML file to test https://tinyurl.com/neurostars24035
1420
NEUROSTARS_24035 = _f.read()
1521
# A loaded YAML file to test https://tinyurl.com/cmicnlslack420349
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
- site: site-1
2+
subject_id: 01
3+
unique_id: 02
4+
derivatives_dir: /fprep/sub-0151
5+
- site: site-1
6+
subject_id: !!str 02
7+
unique_id: 02
8+
derivatives_dir: /fprep/sub-0151

CPAC/utils/tests/test_bids_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,21 @@
1616
# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
1717
"""Tests for bids_utils."""
1818

19+
from importlib import resources
1920
import os
2021
from subprocess import run
2122

2223
import pytest
2324
import yaml
2425

2526
from CPAC.utils.bids_utils import (
27+
_check_value_type,
2628
bids_gen_cpac_sublist,
2729
cl_strip_brackets,
2830
collect_bids_files_configs,
2931
create_cpac_data_config,
3032
load_cpac_data_config,
33+
load_yaml_config,
3134
sub_list_filter_by_labels,
3235
)
3336
from CPAC.utils.monitoring.custom_logging import getLogger
@@ -107,6 +110,19 @@ def test_gen_bids_sublist(bids_dir, test_yml, creds_path, dbg=False):
107110
assert sublist
108111

109112

113+
def test_load_data_config_with_ints() -> None:
114+
"""Check that C-PAC coerces sub- and ses- ints to strings."""
115+
data_config_file = resources.files("CPAC").joinpath(
116+
"utils/tests/configs/github_2144.yml"
117+
)
118+
# make sure there are ints in the test data
119+
assert _check_value_type(load_yaml_config(str(data_config_file), None))
120+
# make sure there aren't ints when it's loaded through the loader
121+
assert not _check_value_type(
122+
load_cpac_data_config(str(data_config_file), None, None)
123+
)
124+
125+
110126
@pytest.mark.parametrize("t1w_label", ["acq-HCP", "acq-VNavNorm", "T1w", None])
111127
@pytest.mark.parametrize(
112128
"bold_label", ["task-peer_run-1", "[task-peer_run-1 task-peer_run-2]", "bold", None]

0 commit comments

Comments
 (0)