diff --git a/.circleci/main.yml b/.circleci/main.yml
index a13300a78d..91add3529f 100644
--- a/.circleci/main.yml
+++ b/.circleci/main.yml
@@ -68,7 +68,9 @@ commands:
steps:
- run:
name: Getting Sample BIDS Data
- command: git clone https://github.com/bids-standard/bids-examples.git
+ command: |
+ mkdir -p /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples
+ git clone https://github.com/bids-standard/bids-examples.git /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples
get-singularity:
parameters:
version:
@@ -156,7 +158,7 @@ commands:
then
TAG=nightly
else
- TAG="${CIRCLE_BRANCH//\//_}"
+ TAG=`echo ${CIRCLE_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'`
fi
DOCKER_TAG="ghcr.io/${CIRCLE_PROJECT_USERNAME,,}/${CIRCLE_PROJECT_REPONAME,,}:${TAG,,}"
if [[ -n "<< parameters.variant >>" ]]
@@ -172,7 +174,7 @@ commands:
name: Testing Singularity installation
command: |
pip install -r dev/circleci_data/requirements.txt
- coverage run -m pytest --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py
+ coverage run -m pytest --capture=no --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py
jobs:
combine-coverage:
diff --git a/.github/workflows/build_C-PAC.yml b/.github/workflows/build_C-PAC.yml
index d126f6a778..ef7a196cef 100644
--- a/.github/workflows/build_C-PAC.yml
+++ b/.github/workflows/build_C-PAC.yml
@@ -42,7 +42,7 @@ jobs:
GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-)
if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]]
then
- TAG=${GITHUB_BRANCH//\//_}
+ TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'`
DOCKERFILE=.github/Dockerfiles/C-PAC.develop$VARIANT-$OS.Dockerfile
elif [[ $GITHUB_BRANCH == 'develop' ]]
then
diff --git a/.github/workflows/regression_test_full.yml b/.github/workflows/regression_test_full.yml
index 6dba2d1bf2..20d25a9316 100644
--- a/.github/workflows/regression_test_full.yml
+++ b/.github/workflows/regression_test_full.yml
@@ -13,7 +13,7 @@ jobs:
GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-)
if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]]
then
- TAG=${GITHUB_BRANCH//\//_}
+ TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'`
elif [[ $GITHUB_BRANCH == 'develop' ]]
then
TAG=nightly
diff --git a/.github/workflows/regression_test_lite.yml b/.github/workflows/regression_test_lite.yml
index 4e6b5a46f6..87aba8a5bd 100644
--- a/.github/workflows/regression_test_lite.yml
+++ b/.github/workflows/regression_test_lite.yml
@@ -37,7 +37,7 @@ jobs:
run: |
if [[ ! $GITHUB_REF_NAME == 'main' ]] && [[ ! $GITHUB_REF_NAME == 'develop' ]]
then
- TAG=${GITHUB_REF_NAME//\//_}
+ TAG=`echo ${GITHUB_REF_NAME} | sed 's/[^a-zA-Z0-9._]/-/g'`
elif [[ $GITHUB_REF_NAME == 'develop' ]]
then
TAG=nightly
diff --git a/.github/workflows/smoke_test_participant.yml b/.github/workflows/smoke_test_participant.yml
index 3fde0de8aa..6b7e219775 100644
--- a/.github/workflows/smoke_test_participant.yml
+++ b/.github/workflows/smoke_test_participant.yml
@@ -68,7 +68,7 @@ jobs:
GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-)
if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]]
then
- TAG=${GITHUB_BRANCH//\//_}
+ TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'`
elif [[ $GITHUB_BRANCH == 'develop' ]]
then
TAG=nightly
@@ -133,7 +133,7 @@ jobs:
GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-)
if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]]
then
- TAG=${GITHUB_BRANCH//\//_}
+ TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'`
elif [[ $GITHUB_BRANCH == 'develop' ]]
then
TAG=nightly
@@ -192,7 +192,7 @@ jobs:
GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-)
if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]]
then
- TAG=${GITHUB_BRANCH//\//_}
+ TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'`
elif [[ $GITHUB_BRANCH == 'develop' ]]
then
TAG=nightly
diff --git a/.ruff.toml b/.ruff.toml
index 265427a1ab..590d3baa47 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -13,6 +13,7 @@ external = ["T20"] # Don't autoremove 'noqa` comments for these rules
"CPAC/utils/sklearn.py" = ["RUF003"]
"CPAC/utils/tests/old_functions.py" = ["C", "D", "E", "EM", "PLW", "RET"]
"CPAC/utils/utils.py" = ["T201"] # until `repickle` is removed
+"dev/circleci_data/conftest.py" = ["F401"]
"setup.py" = ["D1"]
[lint.flake8-import-conventions.extend-aliases]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index df8f40a666..a8bb98da0a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Moved `pygraphviz` from requirements to `graphviz` optional dependencies group.
+- Split `ResourcePool` into three classes: `Resource`, `ResourcePool`, and `StratPool`.
### Fixed
diff --git a/CPAC/alff/alff.py b/CPAC/alff/alff.py
index f8bfc1a0b8..e26342ffb5 100644
--- a/CPAC/alff/alff.py
+++ b/CPAC/alff/alff.py
@@ -22,7 +22,7 @@
from CPAC.alff.utils import get_opt_string
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.registration.registration import apply_transform
from CPAC.utils.interfaces import Function
from CPAC.utils.utils import check_prov_for_regtool
diff --git a/CPAC/anat_preproc/anat_preproc.py b/CPAC/anat_preproc/anat_preproc.py
index 0f4e770f97..5a6acd286e 100644
--- a/CPAC/anat_preproc/anat_preproc.py
+++ b/CPAC/anat_preproc/anat_preproc.py
@@ -34,7 +34,7 @@
wb_command,
)
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.utils.interfaces import Function
from CPAC.utils.interfaces.fsl import Merge as fslMerge
diff --git a/CPAC/conftest.py b/CPAC/conftest.py
new file mode 100644
index 0000000000..52113ebd40
--- /dev/null
+++ b/CPAC/conftest.py
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""Global pytest configuration."""
+
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def bids_examples(cache: pytest.Cache) -> Path:
+ """Get cached example BIDS directories."""
+ bids_dir = cache.mkdir("bids-examples").absolute()
+ if not (bids_dir.exists() and list(bids_dir.iterdir())):
+ from git import Repo
+
+ Repo.clone_from("https://github.com/bids-standard/bids-examples.git", bids_dir)
+ return bids_dir
diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py
index a7f0eaefcc..5f0728b628 100644
--- a/CPAC/distortion_correction/distortion_correction.py
+++ b/CPAC/distortion_correction/distortion_correction.py
@@ -32,7 +32,7 @@
run_fsl_topup,
)
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.utils import function
from CPAC.utils.datasource import match_epi_fmaps
from CPAC.utils.interfaces.function import Function
@@ -438,11 +438,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None):
node, out = strat_pool.get_data("pe-direction")
wf.connect(node, out, match_epi_fmaps_node, "bold_pedir")
- # interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'),
- # 'desc-brain_bold': 'opposite_pe_epi_brain'}
- # wf, strat_pool = wrap_block([bold_mask_afni, bold_masking],
- # interface, wf, cfg, strat_pool, pipe_num, opt)
-
func_get_brain_mask = pe.Node(
interface=preprocess.Automask(), name=f"afni_mask_opposite_pe_{pipe_num}"
)
@@ -530,10 +525,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None):
wf.connect(node, out, undistort_func_mean, "reference_image")
wf.connect(convert_afni_warp, "ants_warp", undistort_func_mean, "transforms")
- # interface = {'desc-preproc_bold': (undistort_func_mean, 'output_image')}
- # wf, strat_pool = wrap_block([bold_mask_afni],
- # interface, wf, cfg, strat_pool, pipe_num, opt)
-
remask = pe.Node(
interface=preprocess.Automask(), name=f"afni_remask_boldmask_{pipe_num}"
)
@@ -764,7 +755,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None):
wf.connect(run_topup, "out_jacs", vnum_base, "jac_matrix_list")
wf.connect(run_topup, "out_warps", vnum_base, "warp_field_list")
- mean_bold = strat_pool.node_data("sbref")
+ mean_bold = strat_pool.get_data("sbref")
flirt = pe.Node(interface=fsl.FLIRT(), name="flirt")
flirt.inputs.dof = 6
diff --git a/CPAC/func_preproc/func_ingress.py b/CPAC/func_preproc/func_ingress.py
index 60c8ccf5c9..2105503a19 100644
--- a/CPAC/func_preproc/func_ingress.py
+++ b/CPAC/func_preproc/func_ingress.py
@@ -14,12 +14,21 @@
# You should have received a copy of the GNU Lesser General Public
# License along with C-PAC. If not, see .
-from CPAC.utils.datasource import create_func_datasource, ingress_func_metadata
+"""Ingress functional data for preprocessing."""
+
+from CPAC.utils.strategy import Strategy
def connect_func_ingress(
- workflow, strat_list, c, sub_dict, subject_id, input_creds_path, unique_id=None
+ workflow,
+ strat_list: list[Strategy],
+ c,
+ sub_dict,
+ subject_id,
+ input_creds_path,
+ unique_id=None,
):
+ """Connect functional ingress workflow."""
for num_strat, strat in enumerate(strat_list):
if "func" in sub_dict:
func_paths_dict = sub_dict["func"]
@@ -31,7 +40,9 @@ def connect_func_ingress(
else:
workflow_name = f"func_gather_{unique_id}_{num_strat}"
- func_wf = create_func_datasource(func_paths_dict, workflow_name)
+ func_wf = strat._resource_pool.create_func_datasource(
+ func_paths_dict, workflow_name
+ )
func_wf.inputs.inputnode.set(
subject=subject_id,
@@ -47,8 +58,6 @@ def connect_func_ingress(
}
)
- (workflow, strat.rpool, diff, blip, fmap_rp_list) = ingress_func_metadata(
- workflow, c, strat.rpool, sub_dict, subject_id, input_creds_path, unique_id
- )
+ diff, blip, fmap_rp_list = strat.rpool.ingress_func_metadata()
- return (workflow, diff, blip, fmap_rp_list)
+ return strat.rpool.wf, diff, blip, fmap_rp_list
diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py
index bea7d2e29c..dfec8ab91c 100644
--- a/CPAC/func_preproc/func_motion.py
+++ b/CPAC/func_preproc/func_motion.py
@@ -31,7 +31,7 @@
motion_power_statistics,
)
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.pipeline.schema import valid_options
from CPAC.utils.interfaces.function import Function
from CPAC.utils.utils import check_prov_for_motion_tool
@@ -830,7 +830,7 @@ def motion_estimate_filter(wf, cfg, strat_pool, pipe_num, opt=None):
notch.inputs.lowpass_cutoff = opt.get("lowpass_cutoff")
notch.inputs.filter_order = opt.get("filter_order")
- movement_parameters = strat_pool.node_data("desc-movementParameters_motion")
+ movement_parameters = strat_pool.get_data("desc-movementParameters_motion")
wf.connect(
movement_parameters.node, movement_parameters.out, notch, "motion_params"
)
diff --git a/CPAC/func_preproc/func_preproc.py b/CPAC/func_preproc/func_preproc.py
index 7004b4f025..69b856509a 100644
--- a/CPAC/func_preproc/func_preproc.py
+++ b/CPAC/func_preproc/func_preproc.py
@@ -22,7 +22,7 @@
from CPAC.func_preproc.utils import nullify
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.utils.interfaces import Function
from CPAC.utils.interfaces.ants import (
AI, # niworkflows
@@ -993,7 +993,7 @@ def bold_mask_fsl_afni(wf, cfg, strat_pool, pipe_num, opt=None):
# and this function has been changed.
# CHANGES:
- # * Converted from a plain function to a CPAC.pipeline.nodeblock.NodeBlockFunction
+ # * Converted from a plain function to a CPAC.pipeline.engine.nodeblock.NodeBlockFunction
# * Removed Registration version check
# * Hardcoded Registration parameters instead of loading epi_atlasbased_brainmask.json
# * Uses C-PAC's ``FSL-AFNI-brain-probseg`` template in place of ``templateflow.api.get("MNI152NLin2009cAsym", resolution=1, label="brain", suffix="probseg")``
diff --git a/CPAC/func_preproc/tests/test_preproc_connections.py b/CPAC/func_preproc/tests/test_preproc_connections.py
index f58380a7fd..9b7da2ed4c 100644
--- a/CPAC/func_preproc/tests/test_preproc_connections.py
+++ b/CPAC/func_preproc/tests/test_preproc_connections.py
@@ -36,7 +36,6 @@
)
from CPAC.func_preproc.func_preproc import func_normalize
from CPAC.nuisance.nuisance import choose_nuisance_blocks
-from CPAC.pipeline.cpac_pipeline import connect_pipeline
from CPAC.pipeline.engine import ResourcePool
from CPAC.pipeline.nipype_pipeline_engine import Workflow
from CPAC.registration.registration import (
@@ -81,7 +80,7 @@
"from-template_to-T1w_mode-image_desc-linear_xfm",
]
-NUM_TESTS = 48 # number of parameterizations to run for many-parameter tests
+NUM_TESTS = 8 # number of parameterizations to run for many-parameter tests
def _filter_assertion_message(
@@ -268,7 +267,7 @@ def test_motion_filter_connections(
if not rpool.check_rpool("desc-cleaned_bold"):
pipeline_blocks += choose_nuisance_blocks(c, generate_only)
wf = Workflow(re.sub(r"[\[\]\-\:\_ \'\",]", "", str(rpool)))
- connect_pipeline(wf, c, rpool, pipeline_blocks)
+ rpool.connect_pipeline(wf, c, pipeline_blocks)
# Check that filtering is happening as expected
filter_switch_key = [
"functional_preproc",
diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py
index 4229fc30c6..962d444a4e 100644
--- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py
+++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py
@@ -21,17 +21,16 @@
import nipype.interfaces.io as nio
from indi_aws import aws_utils
+from CPAC.func_preproc.func_ingress import connect_func_ingress
from CPAC.longitudinal_pipeline.longitudinal_preproc import subject_specific_template
from CPAC.pipeline import nipype_pipeline_engine as pe
from CPAC.pipeline.cpac_pipeline import (
build_anat_preproc_stack,
build_segmentation_stack,
build_T1w_registration_stack,
- connect_pipeline,
- initialize_nipype_wf,
)
-from CPAC.pipeline.engine import ingress_output_dir, initiate_rpool
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine import ResourcePool
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.registration import (
create_fsl_flirt_linear_reg,
create_fsl_fnirt_nonlinear_reg,
@@ -428,16 +427,13 @@ def anat_longitudinal_wf(subject_id, sub_list, config):
except KeyError:
input_creds_path = None
- workflow = initialize_nipype_wf(
- config,
- sub_list[0],
- # just grab the first one for the name
- name="anat_longitudinal_pre-preproc",
+ rpool = ResourcePool(
+ cfg=config,
+ data_paths=session,
+ pipeline_name="anat_longitudinal_pre-preproc",
)
-
- workflow, rpool = initiate_rpool(workflow, config, session)
pipeline_blocks = build_anat_preproc_stack(rpool, config)
- workflow = connect_pipeline(workflow, config, rpool, pipeline_blocks)
+ workflow = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks)
session_wfs[unique_id] = rpool
@@ -473,13 +469,6 @@ def anat_longitudinal_wf(subject_id, sub_list, config):
)
for strat in strats_brain_dct.keys():
- wf = initialize_nipype_wf(
- config,
- sub_list[0],
- # just grab the first one for the name
- name=f"template_node_{strat}",
- )
-
config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}"
template_node_name = f"longitudinal_anat_template_{strat}"
@@ -507,9 +496,9 @@ def anat_longitudinal_wf(subject_id, sub_list, config):
template_node.inputs.input_skull_list = strats_head_dct[strat]
long_id = f"longitudinal_{subject_id}_strat-{strat}"
-
- wf, rpool = initiate_rpool(wf, config, part_id=long_id)
-
+ rpool = ResourcePool(
+ cfg=config, part_id=long_id, pipeline_name=f"template_node_{strat}"
+ )
rpool.set_data(
"space-longitudinal_desc-brain_T1w",
template_node,
@@ -552,7 +541,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config):
pipeline_blocks = build_segmentation_stack(rpool, config, pipeline_blocks)
- wf = connect_pipeline(wf, config, rpool, pipeline_blocks)
+ wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks)
excl = [
"space-longitudinal_desc-brain_T1w",
@@ -574,7 +563,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config):
creds_path = session["creds_path"]
if creds_path and "none" not in creds_path.lower():
if os.path.exists(creds_path):
- input_creds_path = os.path.abspath(creds_path)
+ session["creds_path"] = os.path.abspath(creds_path)
else:
err_msg = (
'Credentials path: "%s" for subject "%s" '
@@ -583,18 +572,14 @@ def anat_longitudinal_wf(subject_id, sub_list, config):
)
raise Exception(err_msg)
else:
- input_creds_path = None
+ session["creds_path"] = None
except KeyError:
- input_creds_path = None
-
- wf = initialize_nipype_wf(config, sub_list[0])
-
- wf, rpool = initiate_rpool(wf, config, session)
+ session["creds_path"] = None
config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}"
- rpool = ingress_output_dir(
- config, rpool, long_id, creds_path=input_creds_path
- )
+ rpool = ResourcePool(cfg=config, data_paths=session)
+ wf = rpool.wf
+ rpool.ingress_output_dir()
select_node_name = f"select_{unique_id}"
select_sess = pe.Node(
@@ -654,17 +639,14 @@ def anat_longitudinal_wf(subject_id, sub_list, config):
input_creds_path = None
except KeyError:
input_creds_path = None
-
- wf = initialize_nipype_wf(config, sub_list[0])
-
- wf, rpool = initiate_rpool(wf, config, session)
-
+ session["creds_path"] = input_creds_path
+ rpool = ResourcePool(cfg=config, data_paths=session)
pipeline_blocks = [
warp_longitudinal_T1w_to_template,
warp_longitudinal_seg_to_T1w,
]
- wf = connect_pipeline(wf, config, rpool, pipeline_blocks)
+ wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks)
rpool.gather_pipes(wf, config)
diff --git a/CPAC/network_centrality/pipeline.py b/CPAC/network_centrality/pipeline.py
index e486f8eff0..407489fd9f 100644
--- a/CPAC/network_centrality/pipeline.py
+++ b/CPAC/network_centrality/pipeline.py
@@ -19,7 +19,7 @@
from CPAC.network_centrality.network_centrality import create_centrality_wf
from CPAC.network_centrality.utils import check_centrality_params, create_merge_node
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.pipeline.schema import valid_options
diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py
index 45337a0c23..04807755b7 100644
--- a/CPAC/nuisance/nuisance.py
+++ b/CPAC/nuisance/nuisance.py
@@ -37,8 +37,8 @@
TR_string_to_float,
)
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.engine import ResourcePool
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
+from CPAC.pipeline.engine.resource import StratPool
from CPAC.registration.registration import (
apply_transform,
warp_timeseries_to_EPItemplate,
@@ -2363,7 +2363,7 @@ def erode_mask_WM(wf, cfg, strat_pool, pipe_num, opt=None):
outputs=["desc-confounds_timeseries", "censor-indices"],
)
def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None):
- return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "bold")
+ return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "bold")
@nodeblock(
@@ -2407,40 +2407,22 @@ def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, op
outputs=["desc-confounds_timeseries", "censor-indices"],
)
def nuisance_regressors_generation_T1w(wf, cfg, strat_pool, pipe_num, opt=None):
- return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "T1w")
+ return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "T1w")
def nuisance_regressors_generation(
wf: Workflow,
cfg: Configuration,
- strat_pool: ResourcePool,
+ strat_pool: StratPool,
pipe_num: int,
- opt: dict,
space: Literal["T1w", "bold"],
) -> tuple[Workflow, dict]:
- """Generate nuisance regressors.
-
- Parameters
- ----------
- wf : ~nipype.pipeline.engine.workflows.Workflow
-
- cfg : ~CPAC.utils.configuration.Configuration
-
- strat_pool : ~CPAC.pipeline.engine.ResourcePool
-
- pipe_num : int
-
- opt : dict
-
- space : str
- T1w or bold
-
- Returns
- -------
- wf : nipype.pipeline.engine.workflows.Workflow
-
- outputs : dict
- """
+ """Generate nuisance regressors."""
+ try:
+ opt = strat_pool.regressor_dct
+ except LookupError:
+ # no regressors to generate
+ return wf, {}
prefixes = [f"space-{space}_"] * 2
reg_tool = None
if space == "T1w":
@@ -2664,7 +2646,7 @@ def nuisance_regressors_generation(
return (wf, outputs)
-def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None):
+def nuisance_regression(wf, cfg, strat_pool: StratPool, pipe_num, opt, space, res=None):
"""Nuisance regression in native (BOLD) or template space.
Parameters
@@ -2681,7 +2663,11 @@ def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None):
outputs : dict
"""
- opt = strat_pool.regressor_dct(cfg)
+ try:
+ opt = strat_pool.regressor_dct
+ except LookupError:
+ # no regressors
+ return wf, {}
bandpass = "Bandpass" in opt
bandpass_before = (
bandpass
diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py
index 40811b9e77..4e92fe7f45 100644
--- a/CPAC/pipeline/cpac_pipeline.py
+++ b/CPAC/pipeline/cpac_pipeline.py
@@ -25,12 +25,16 @@
import sys
import time
from time import strftime
+from typing import Any
import yaml
-import nipype
+import nipype # type: ignore [import-untyped]
from nipype import config, logging
-from flowdump import save_workflow_json, WorkflowJSONMeta
-from indi_aws import aws_utils, fetch_creds
+from flowdump import ( # type: ignore [import-untyped]
+ save_workflow_json,
+ WorkflowJSONMeta,
+)
+from indi_aws import aws_utils, fetch_creds # type: ignore [import-untyped]
import CPAC
from CPAC.alff.alff import alff_falff, alff_falff_space_template
@@ -128,9 +132,8 @@
)
# pylint: disable=wrong-import-order
-from CPAC.pipeline import nipype_pipeline_engine as pe
from CPAC.pipeline.check_outputs import check_outputs
-from CPAC.pipeline.engine import initiate_rpool, NodeBlock
+from CPAC.pipeline.engine import ResourcePool
from CPAC.pipeline.nipype_pipeline_engine.plugins import (
LegacyMultiProcPlugin,
MultiProcPlugin,
@@ -197,12 +200,9 @@
from CPAC.utils.docs import version_report
from CPAC.utils.monitoring import (
FMLOGGER,
- getLogger,
log_nodes_cb,
log_nodes_initial,
- LOGTAIL,
set_up_logger,
- WARNING_FREESURFER_OFF_WITH_DATA,
WFLOGGER,
)
from CPAC.utils.monitoring.draw_gantt_chart import resource_report
@@ -422,7 +422,7 @@ def run_workflow(
license_notice=CPAC.license_notice.replace("\n", "\n "),
),
)
- subject_info = {}
+ subject_info: dict[str, Any] = {}
subject_info["subject_id"] = subject_id
subject_info["start_time"] = pipeline_start_time
@@ -560,7 +560,7 @@ def run_workflow(
# for strat_no, strat in enumerate(strat_list):
# strat_label = 'strat_%d' % strat_no
- # subject_info[strat_label] = strat.get_name()
+ # subject_info[strat_label] = strat.name
# subject_info['resource_pool'].append(strat.get_resource_pool())
subject_info["status"] = "Running"
@@ -710,21 +710,24 @@ def run_workflow(
]
timeHeader = dict(zip(gpaTimeFields, gpaTimeFields))
- with open(
- os.path.join(
- c.pipeline_setup["log_directory"]["path"],
- "cpac_individual_timing"
- f"_{c.pipeline_setup['pipeline_name']}.csv",
- ),
- "a",
- ) as timeCSV, open(
- os.path.join(
- c.pipeline_setup["log_directory"]["path"],
- "cpac_individual_timing_%s.csv"
- % c.pipeline_setup["pipeline_name"],
- ),
- "r",
- ) as readTimeCSV:
+ with (
+ open(
+ os.path.join(
+ c.pipeline_setup["log_directory"]["path"],
+ "cpac_individual_timing"
+ f"_{c.pipeline_setup['pipeline_name']}.csv",
+ ),
+ "a",
+ ) as timeCSV,
+ open(
+ os.path.join(
+ c.pipeline_setup["log_directory"]["path"],
+ "cpac_individual_timing_%s.csv"
+ % c.pipeline_setup["pipeline_name"],
+ ),
+ "r",
+ ) as readTimeCSV,
+ ):
timeWriter = csv.DictWriter(timeCSV, fieldnames=gpaTimeFields)
timeReader = csv.DictReader(readTimeCSV)
@@ -853,24 +856,6 @@ def remove_workdir(wdpath: str) -> None:
FMLOGGER.warning("Could not remove working directory %s", wdpath)
-def initialize_nipype_wf(cfg, sub_data_dct, name=""):
- """Initialize a new nipype workflow."""
- if name:
- name = f"_{name}"
-
- workflow_name = (
- f'cpac{name}_{sub_data_dct["subject_id"]}_{sub_data_dct["unique_id"]}'
- )
- wf = pe.Workflow(name=workflow_name)
- wf.base_dir = cfg.pipeline_setup["working_directory"]["path"]
- wf.config["execution"] = {
- "hash_method": "timestamp",
- "crashdump_dir": os.path.abspath(cfg.pipeline_setup["log_directory"]["path"]),
- }
-
- return wf
-
-
def load_cpac_pipe_config(pipe_config):
"""Load in pipeline config file."""
config_file = os.path.realpath(pipe_config)
@@ -1071,7 +1056,6 @@ def build_T1w_registration_stack(rpool, cfg, pipeline_blocks=None):
warp_wholeheadT1_to_template,
warp_T1mask_to_template,
]
-
if not rpool.check_rpool("desc-restore-brain_T1w"):
reg_blocks.append(correct_restore_brain_intensity_abcd)
@@ -1128,99 +1112,10 @@ def build_segmentation_stack(rpool, cfg, pipeline_blocks=None):
return pipeline_blocks
-def list_blocks(pipeline_blocks, indent=None):
- """List node blocks line by line.
-
- Parameters
- ----------
- pipeline_blocks : list or tuple
-
- indent : int or None
- number of spaces after a tab indent
-
- Returns
- -------
- str
- """
- blockstring = yaml.dump(
- [
- getattr(
- block,
- "__name__",
- getattr(
- block,
- "name",
- yaml.safe_load(list_blocks(list(block)))
- if isinstance(block, (tuple, list, set))
- else str(block),
- ),
- )
- for block in pipeline_blocks
- ]
- )
- if isinstance(indent, int):
- blockstring = "\n".join(
- [
- "\t" + " " * indent + line.replace("- - ", "- ")
- for line in blockstring.split("\n")
- ]
- )
- return blockstring
-
-
-def connect_pipeline(wf, cfg, rpool, pipeline_blocks):
- """Connect the pipeline blocks to the workflow."""
- WFLOGGER.info(
- "Connecting pipeline blocks:\n%s", list_blocks(pipeline_blocks, indent=1)
- )
-
- previous_nb = None
- for block in pipeline_blocks:
- try:
- nb = NodeBlock(block, debug=cfg["pipeline_setup", "Debugging", "verbose"])
- wf = nb.connect_block(wf, cfg, rpool)
- except LookupError as e:
- if nb.name == "freesurfer_postproc":
- WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA)
- LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA)
- continue
- previous_nb_str = (
- (f"after node block '{previous_nb.get_name()}':")
- if previous_nb
- else "at beginning:"
- )
- # Alert user to block that raises error
- if isinstance(block, list):
- node_block_names = str([NodeBlock(b).get_name() for b in block])
- e.args = (
- f"When trying to connect one of the node blocks "
- f"{node_block_names} "
- f"to workflow '{wf}' {previous_nb_str} {e.args[0]}",
- )
- else:
- node_block_names = NodeBlock(block).get_name()
- e.args = (
- f"When trying to connect node block "
- f"'{node_block_names}' "
- f"to workflow '{wf}' {previous_nb_str} {e.args[0]}",
- )
- if cfg.pipeline_setup["Debugging"]["verbose"]:
- verbose_logger = getLogger("CPAC.engine")
- verbose_logger.debug(e.args[0])
- verbose_logger.debug(rpool)
- raise
- previous_nb = nb
-
- return wf
-
-
def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None):
"""Build a C-PAC workflow for a single subject."""
from CPAC.utils.datasource import gather_extraction_maps
- # Workflow setup
- wf = initialize_nipype_wf(cfg, sub_dict, name=pipeline_name)
-
# Extract credentials path if it exists
try:
creds_path = sub_dict["creds_path"]
@@ -1244,8 +1139,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None):
# PREPROCESSING
# """""""""""""""""""""""""""""""""""""""""""""""""""
- wf, rpool = initiate_rpool(wf, cfg, sub_dict)
-
+ rpool = ResourcePool(cfg=cfg, data_paths=sub_dict, pipeline_name=pipeline_name)
pipeline_blocks = build_anat_preproc_stack(rpool, cfg)
# Anatomical to T1 template registration
@@ -1437,7 +1331,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None):
if rpool.check_rpool(func):
apply_func_warp["T1"] = False
- target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"]
+ # target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"]
target_space_alff = cfg.amplitude_low_frequency_fluctuation["target_space"]
target_space_reho = cfg.regional_homogeneity["target_space"]
@@ -1612,7 +1506,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None):
# Connect the entire pipeline!
try:
- wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks)
+ wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks)
except LookupError as lookup_error:
missing_key = None
errorstrings = [arg for arg in lookup_error.args[0].split("\n") if arg.strip()]
diff --git a/CPAC/pipeline/engine.py b/CPAC/pipeline/engine.py
deleted file mode 100644
index d7f53f7029..0000000000
--- a/CPAC/pipeline/engine.py
+++ /dev/null
@@ -1,2761 +0,0 @@
-# Copyright (C) 2021-2024 C-PAC Developers
-
-# This file is part of C-PAC.
-
-# C-PAC is free software: you can redistribute it and/or modify it under
-# the terms of the GNU Lesser General Public License as published by the
-# Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-
-# C-PAC is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public
-# License along with C-PAC. If not, see .
-import ast
-import copy
-import hashlib
-from itertools import chain
-import json
-import os
-import re
-from typing import Optional
-import warnings
-
-from nipype import config, logging
-from nipype.interfaces.utility import Rename
-
-from CPAC.image_utils.spatial_smoothing import spatial_smoothing
-from CPAC.image_utils.statistical_transforms import (
- fisher_z_score_standardize,
- z_score_standardize,
-)
-from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.check_outputs import ExpectedOutputs
-from CPAC.pipeline.nodeblock import NodeBlockFunction
-from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set
-from CPAC.registration.registration import transform_derivative
-from CPAC.resources.templates.lookup_table import lookup_identifier
-from CPAC.utils.bids_utils import res_in_filename
-from CPAC.utils.configuration import Configuration
-from CPAC.utils.datasource import (
- create_anat_datasource,
- create_func_datasource,
- create_general_datasource,
- ingress_func_metadata,
- resolve_resolution,
-)
-from CPAC.utils.interfaces.datasink import DataSink
-from CPAC.utils.interfaces.function import Function
-from CPAC.utils.monitoring import (
- getLogger,
- LOGTAIL,
- WARNING_FREESURFER_OFF_WITH_DATA,
- WFLOGGER,
-)
-from CPAC.utils.outputs import Outputs
-from CPAC.utils.utils import (
- check_prov_for_regtool,
- create_id_string,
- get_last_prov_entry,
- read_json,
- write_output_json,
-)
-
-
-class ResourcePool:
- def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None):
- if not rpool:
- self.rpool = {}
- else:
- self.rpool = rpool
-
- if not pipe_list:
- self.pipe_list = []
- else:
- self.pipe_list = pipe_list
-
- self.name = name
- self.info = {}
-
- if cfg:
- self.cfg = cfg
- self.logdir = cfg.pipeline_setup["log_directory"]["path"]
-
- self.num_cpus = cfg.pipeline_setup["system_config"][
- "max_cores_per_participant"
- ]
- self.num_ants_cores = cfg.pipeline_setup["system_config"][
- "num_ants_threads"
- ]
-
- self.ants_interp = cfg.registration_workflows["functional_registration"][
- "func_registration_to_template"
- ]["ANTs_pipelines"]["interpolation"]
- self.fsl_interp = cfg.registration_workflows["functional_registration"][
- "func_registration_to_template"
- ]["FNIRT_pipelines"]["interpolation"]
-
- self.func_reg = cfg.registration_workflows["functional_registration"][
- "func_registration_to_template"
- ]["run"]
-
- self.run_smoothing = (
- "smoothed" in cfg.post_processing["spatial_smoothing"]["output"]
- )
- self.smoothing_bool = cfg.post_processing["spatial_smoothing"]["run"]
- self.run_zscoring = "z-scored" in cfg.post_processing["z-scoring"]["output"]
- self.zscoring_bool = cfg.post_processing["z-scoring"]["run"]
- self.fwhm = cfg.post_processing["spatial_smoothing"]["fwhm"]
- self.smooth_opts = cfg.post_processing["spatial_smoothing"][
- "smoothing_method"
- ]
-
- self.xfm = [
- "alff",
- "desc-sm_alff",
- "desc-zstd_alff",
- "desc-sm-zstd_alff",
- "falff",
- "desc-sm_falff",
- "desc-zstd_falff",
- "desc-sm-zstd_falff",
- "reho",
- "desc-sm_reho",
- "desc-zstd_reho",
- "desc-sm-zstd_reho",
- ]
-
- def __repr__(self) -> str:
- params = [
- f"{param}={getattr(self, param)}"
- for param in ["rpool", "name", "cfg", "pipe_list"]
- if getattr(self, param, None) is not None
- ]
- return f'ResourcePool({", ".join(params)})'
-
- def __str__(self) -> str:
- if self.name:
- return f"ResourcePool({self.name}): {list(self.rpool)}"
- return f"ResourcePool: {list(self.rpool)}"
-
- def append_name(self, name):
- self.name.append(name)
-
- def back_propogate_template_name(
- self, wf, resource_idx: str, json_info: dict, id_string: "pe.Node"
- ) -> None:
- """Find and apply the template name from a resource's provenance.
-
- Parameters
- ----------
- resource_idx : str
-
- json_info : dict
-
- id_string : pe.Node
-
- Returns
- -------
- None
- """
- if "template" in resource_idx and self.check_rpool("derivatives-dir"):
- if self.check_rpool("template"):
- node, out = self.get_data("template")
- wf.connect(node, out, id_string, "template_desc")
- elif "Template" in json_info:
- id_string.inputs.template_desc = json_info["Template"]
- elif (
- "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1
- ):
- for resource in source_set(json_info["CpacProvenance"]):
- source, value = resource.split(":", 1)
- if value.startswith("template_") and source != "FSL-AFNI-bold-ref":
- # 'FSL-AFNI-bold-ref' is currently allowed to be in
- # a different space, so don't use it as the space for
- # descendents
- try:
- anscestor_json = next(iter(self.rpool.get(source).items()))[
- 1
- ].get("json", {})
- if "Description" in anscestor_json:
- id_string.inputs.template_desc = anscestor_json[
- "Description"
- ]
- return
- except (IndexError, KeyError):
- pass
- return
-
- def get_name(self):
- return self.name
-
- def check_rpool(self, resource):
- if not isinstance(resource, list):
- resource = [resource]
- for name in resource:
- if name in self.rpool:
- return True
- return False
-
- def get_pipe_number(self, pipe_idx):
- return self.pipe_list.index(pipe_idx)
-
- def get_pool_info(self):
- return self.info
-
- def set_pool_info(self, info_dct):
- self.info.update(info_dct)
-
- def get_entire_rpool(self):
- return self.rpool
-
- def get_resources(self):
- return self.rpool.keys()
-
- def copy_rpool(self):
- return ResourcePool(
- rpool=copy.deepcopy(self.get_entire_rpool()),
- name=self.name,
- cfg=self.cfg,
- pipe_list=copy.deepcopy(self.pipe_list),
- )
-
- @staticmethod
- def get_raw_label(resource: str) -> str:
- """Remove ``desc-*`` label."""
- for tag in resource.split("_"):
- if "desc-" in tag:
- resource = resource.replace(f"{tag}_", "")
- break
- return resource
-
- def get_strat_info(self, prov, label=None, logdir=None):
- strat_info = {}
- for entry in prov:
- if isinstance(entry, list):
- strat_info[entry[-1].split(":")[0]] = entry
- elif isinstance(entry, str):
- strat_info[entry.split(":")[0]] = entry.split(":")[1]
- if label:
- if not logdir:
- logdir = self.logdir
- WFLOGGER.info(
- "\n\nPrinting out strategy info for %s in %s\n", label, logdir
- )
- write_output_json(
- strat_info, f"{label}_strat_info", indent=4, basedir=logdir
- )
-
- def set_json_info(self, resource, pipe_idx, key, val):
- # TODO: actually should probably be able to inititialize resource/pipe_idx
- if pipe_idx not in self.rpool[resource]:
- msg = (
- "\n[!] DEV: The pipeline/strat ID does not exist "
- f"in the resource pool.\nResource: {resource}"
- f"Pipe idx: {pipe_idx}\nKey: {key}\nVal: {val}\n"
- )
- raise Exception(msg)
- if "json" not in self.rpool[resource][pipe_idx]:
- self.rpool[resource][pipe_idx]["json"] = {}
- self.rpool[resource][pipe_idx]["json"][key] = val
-
- def get_json_info(self, resource, pipe_idx, key):
- # TODO: key checks
- if not pipe_idx:
- for pipe_idx, val in self.rpool[resource].items():
- return val["json"][key]
- return self.rpool[resource][pipe_idx][key]
-
- @staticmethod
- def get_resource_from_prov(prov):
- # each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD
- # data) has its own provenance list. the name of the resource, and
- # the node that produced it, is always the last item in the provenance
- # list, with the two separated by a colon :
- if not len(prov):
- return None
- if isinstance(prov[-1], list):
- return prov[-1][-1].split(":")[0]
- if isinstance(prov[-1], str):
- return prov[-1].split(":")[0]
- return None
-
- def regressor_dct(self, cfg) -> dict:
- """Return the regressor dictionary for the current strategy if one exists.
-
- Raises KeyError otherwise.
- """
- # pylint: disable=attribute-defined-outside-init
- if hasattr(self, "_regressor_dct"): # memoized
- # pylint: disable=access-member-before-definition
- return self._regressor_dct
- key_error = KeyError(
- "[!] No regressors in resource pool. \n\n"
- "Try turning on create_regressors or "
- "ingress_regressors."
- )
- _nr = cfg["nuisance_corrections", "2-nuisance_regression"]
- if not hasattr(self, "timeseries"):
- if _nr["Regressors"]:
- self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]}
- else:
- self.regressors = []
- if self.check_rpool("parsed_regressors"): # ingressed regressor
- # name regressor workflow without regressor_prov
- strat_name = _nr["ingress_regressors"]["Regressors"]["Name"]
- if strat_name in self.regressors:
- self._regressor_dct = self.regressors[strat_name]
- return self._regressor_dct
- self.regressor_dct = _nr["ingress_regressors"]["Regressors"]
- return self.regressor_dct
- prov = self.get_cpac_provenance("desc-confounds_timeseries")
- strat_name_components = prov[-1].split("_")
- for _ in list(range(prov[-1].count("_"))):
- reg_name = "_".join(strat_name_components[-_:])
- if reg_name in self.regressors:
- self._regressor_dct = self.regressors[reg_name]
- return self._regressor_dct
- raise key_error
-
- def set_data(
- self,
- resource,
- node,
- output,
- json_info,
- pipe_idx,
- node_name,
- fork=False,
- inject=False,
- ):
- json_info = json_info.copy()
- cpac_prov = []
- if "CpacProvenance" in json_info:
- cpac_prov = json_info["CpacProvenance"]
- current_prov_list = list(cpac_prov)
- new_prov_list = list(cpac_prov) # <---- making a copy, it was already a list
- if not inject:
- new_prov_list.append(f"{resource}:{node_name}")
- try:
- res, new_pipe_idx = self.generate_prov_string(new_prov_list)
- except IndexError:
- msg = (
- f"\n\nThe set_data() call for {resource} has no "
- "provenance information and should not be an "
- "injection."
- )
- raise IndexError(msg)
- if not json_info:
- json_info = {
- "RawSources": [
- resource # <---- this will be repopulated to the full file path at the end of the pipeline building, in gather_pipes()
- ]
- }
- json_info["CpacProvenance"] = new_prov_list
-
- if resource not in self.rpool.keys():
- self.rpool[resource] = {}
- elif not fork: # <--- in the event of multiple strategies/options, this will run for every option; just keep in mind
- search = False
- if self.get_resource_from_prov(current_prov_list) == resource:
- # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION
- pipe_idx = self.generate_prov_string(current_prov_list)[1]
- if pipe_idx not in self.rpool[resource].keys():
- search = True
- else:
- search = True
- if search:
- for idx in current_prov_list:
- if self.get_resource_from_prov(idx) == resource:
- if isinstance(idx, list):
- # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION
- pipe_idx = self.generate_prov_string(idx)[1]
- elif isinstance(idx, str):
- pipe_idx = idx
- break
- if pipe_idx in self.rpool[resource].keys():
- # in case the resource name is now new, and not the original
- # remove old keys so we don't end up with a new strat for every new node unit (unless we fork)
- del self.rpool[resource][pipe_idx]
- if new_pipe_idx not in self.rpool[resource]:
- self.rpool[resource][new_pipe_idx] = {}
- if new_pipe_idx not in self.pipe_list:
- self.pipe_list.append(new_pipe_idx)
-
- self.rpool[resource][new_pipe_idx]["data"] = (node, output)
- self.rpool[resource][new_pipe_idx]["json"] = json_info
-
- def get(
- self,
- resource: list[str] | str,
- pipe_idx: Optional[str] = None,
- report_fetched: Optional[bool] = False,
- optional: Optional[bool] = False,
- ) -> tuple[Optional[dict], Optional[str]] | Optional[dict]:
- # NOTE!!!
- # if this is the main rpool, this will return a dictionary of strats, and inside those, are dictionaries like {'data': (node, out), 'json': info}
- # BUT, if this is a sub rpool (i.e. a strat_pool), this will return a one-level dictionary of {'data': (node, out), 'json': info} WITHOUT THE LEVEL OF STRAT KEYS ABOVE IT
- if not isinstance(resource, list):
- resource = [resource]
- # if a list of potential inputs are given, pick the first one found
- for label in resource:
- if label in self.rpool.keys():
- _found = self.rpool[label]
- if pipe_idx:
- _found = _found[pipe_idx]
- if report_fetched:
- return _found, label
- return _found
- if optional:
- if report_fetched:
- return (None, None)
- return None
- msg = (
- "\n\n[!] C-PAC says: None of the listed resources are in "
- f"the resource pool:\n\n {resource}\n\nOptions:\n- You "
- "can enable a node block earlier in the pipeline which "
- "produces these resources. Check the 'outputs:' field in "
- "a node block's documentation.\n- You can directly "
- "provide this required data by pulling it from another "
- "BIDS directory using 'source_outputs_dir:' in the "
- "pipeline configuration, or by placing it directly in "
- "your C-PAC output directory.\n- If you have done these, "
- "and you still get this message, please let us know "
- "through any of our support channels at: "
- "https://fcp-indi.github.io/\n"
- )
- raise LookupError(msg)
-
- def get_data(
- self, resource, pipe_idx=None, report_fetched=False, quick_single=False
- ):
- if report_fetched:
- if pipe_idx:
- connect, fetched = self.get(
- resource, pipe_idx=pipe_idx, report_fetched=report_fetched
- )
- return (connect["data"], fetched)
- connect, fetched = self.get(resource, report_fetched=report_fetched)
- return (connect["data"], fetched)
- if pipe_idx:
- return self.get(resource, pipe_idx=pipe_idx)["data"]
- if quick_single or len(self.get(resource)) == 1:
- for _key, val in self.get(resource).items():
- return val["data"]
- return self.get(resource)["data"]
-
- def copy_resource(self, resource, new_name):
- try:
- self.rpool[new_name] = self.rpool[resource]
- except KeyError:
- msg = f"[!] {resource} not in the resource pool."
- raise Exception(msg)
-
- def update_resource(self, resource, new_name):
- # move over any new pipe_idx's
- self.rpool[new_name].update(self.rpool[resource])
-
- def get_pipe_idxs(self, resource):
- return self.rpool[resource].keys()
-
- def get_json(self, resource, strat=None):
- # NOTE: resource_strat_dct has to be entered properly by the developer
- # it has to either be rpool[resource][strat] or strat_pool[resource]
- if strat:
- resource_strat_dct = self.rpool[resource][strat]
- else:
- # for strat_pools mainly, where there is no 'strat' key level
- resource_strat_dct = self.rpool[resource]
-
- # TODO: the below hits the exception if you use get_cpac_provenance on
- # TODO: the main rpool (i.e. if strat=None)
- if "json" in resource_strat_dct:
- strat_json = resource_strat_dct["json"]
- else:
- msg = (
- "\n[!] Developer info: the JSON "
- f"information for {resource} and {strat} "
- f"is incomplete.\n"
- )
- raise Exception(msg)
- return strat_json
-
- def get_cpac_provenance(self, resource, strat=None):
- # NOTE: resource_strat_dct has to be entered properly by the developer
- # it has to either be rpool[resource][strat] or strat_pool[resource]
- if isinstance(resource, list):
- for _resource in resource:
- try:
- return self.get_cpac_provenance(_resource, strat)
- except KeyError:
- continue
- json_data = self.get_json(resource, strat)
- return json_data["CpacProvenance"]
-
- @staticmethod
- def generate_prov_string(prov):
- # this will generate a string from a SINGLE RESOURCE'S dictionary of
- # MULTIPLE PRECEDING RESOURCES (or single, if just one)
- # NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation)
- if not isinstance(prov, list):
- msg = (
- "\n[!] Developer info: the CpacProvenance "
- f"entry for {prov} has to be a list.\n"
- )
- raise TypeError(msg)
- last_entry = get_last_prov_entry(prov)
- resource = last_entry.split(":")[0]
- return (resource, str(prov))
-
- @staticmethod
- def generate_prov_list(prov_str):
- if not isinstance(prov_str, str):
- msg = (
- "\n[!] Developer info: the CpacProvenance "
- f"entry for {prov_str!s} has to be a string.\n"
- )
- raise TypeError(msg)
- return ast.literal_eval(prov_str)
-
- @staticmethod
- def get_resource_strats_from_prov(prov):
- # if you provide the provenance of a resource pool output, this will
- # return a dictionary of all the preceding resource pool entries that
- # led to that one specific output:
- # {rpool entry}: {that entry's provenance}
- # {rpool entry}: {that entry's provenance}
- resource_strat_dct = {}
- if isinstance(prov, str):
- resource = prov.split(":")[0]
- resource_strat_dct[resource] = prov
- else:
- for spot, entry in enumerate(prov):
- if isinstance(entry, list):
- resource = entry[-1].split(":")[0]
- resource_strat_dct[resource] = entry
- elif isinstance(entry, str):
- resource = entry.split(":")[0]
- resource_strat_dct[resource] = entry
- return resource_strat_dct
-
- def flatten_prov(self, prov):
- if isinstance(prov, str):
- return [prov]
- if isinstance(prov, list):
- flat_prov = []
- for entry in prov:
- if isinstance(entry, list):
- flat_prov += self.flatten_prov(entry)
- else:
- flat_prov.append(entry)
- return flat_prov
- return None
-
- def get_strats(self, resources, debug=False):
- # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS
- # TODO: (and it doesn't have to be)
-
- import itertools
-
- linked_resources = []
- resource_list = []
- if debug:
- verbose_logger = getLogger("CPAC.engine")
- verbose_logger.debug("\nresources: %s", resources)
- for resource in resources:
- # grab the linked-input tuples
- if isinstance(resource, tuple):
- linked = []
- for label in list(resource):
- rp_dct, fetched_resource = self.get(
- label, report_fetched=True, optional=True
- )
- if not rp_dct:
- continue
- linked.append(fetched_resource)
- resource_list += linked
- if len(linked) < 2: # noqa: PLR2004
- continue
- linked_resources.append(linked)
- else:
- resource_list.append(resource)
-
- total_pool = []
- variant_pool = {}
- len_inputs = len(resource_list)
- if debug:
- verbose_logger = getLogger("CPAC.engine")
- verbose_logger.debug("linked_resources: %s", linked_resources)
- verbose_logger.debug("resource_list: %s", resource_list)
- for resource in resource_list:
- (
- rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath
- fetched_resource,
- ) = self.get(
- resource,
- report_fetched=True,
- optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be
- )
- if not rp_dct:
- len_inputs -= 1
- continue
- sub_pool = []
- if debug:
- verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct))
- for strat in rp_dct.keys():
- json_info = self.get_json(fetched_resource, strat)
- cpac_prov = json_info["CpacProvenance"]
- sub_pool.append(cpac_prov)
- if fetched_resource not in variant_pool:
- variant_pool[fetched_resource] = []
- if "CpacVariant" in json_info:
- for key, val in json_info["CpacVariant"].items():
- if val not in variant_pool[fetched_resource]:
- variant_pool[fetched_resource] += val
- variant_pool[fetched_resource].append(f"NO-{val[0]}")
-
- if debug:
- verbose_logger = getLogger("CPAC.engine")
- verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool)
- total_pool.append(sub_pool)
-
- if not total_pool:
- raise LookupError(
- "\n\n[!] C-PAC says: None of the listed "
- "resources in the node block being connected "
- "exist in the resource pool.\n\nResources:\n"
- "%s\n\n" % resource_list
- )
-
- # TODO: right now total_pool is:
- # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]],
- # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]]
-
- # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.]
- # TODO: and the actual resource is encoded in the tag: of the last item, every time!
- # keying the strategies to the resources, inverting it
- if len_inputs > 1:
- strats = itertools.product(*total_pool)
-
- # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations.
- # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}.
- # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items.
- new_strats = {}
-
- # get rid of duplicates - TODO: refactor .product
- strat_str_list = []
- strat_list_list = []
- for strat_tuple in strats:
- strat_list = list(copy.deepcopy(strat_tuple))
- strat_str = str(strat_list)
- if strat_str not in strat_str_list:
- strat_str_list.append(strat_str)
- strat_list_list.append(strat_list)
-
- if debug:
- verbose_logger = getLogger("CPAC.engine")
- verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list))
- for strat_list in strat_list_list:
- json_dct = {}
- for strat in strat_list:
- # strat is a prov list for a single resource/input
- strat_resource, strat_idx = self.generate_prov_string(strat)
- strat_json = self.get_json(strat_resource, strat=strat_idx)
- json_dct[strat_resource] = strat_json
-
- drop = False
- if linked_resources:
- for linked in linked_resources: # <--- 'linked' is each tuple
- if drop:
- break
- for xlabel in linked:
- if drop:
- break
- xjson = copy.deepcopy(json_dct[xlabel])
- for ylabel in linked:
- if xlabel == ylabel:
- continue
- yjson = copy.deepcopy(json_dct[ylabel])
-
- if "CpacVariant" not in xjson:
- xjson["CpacVariant"] = {}
- if "CpacVariant" not in yjson:
- yjson["CpacVariant"] = {}
-
- current_strat = []
- for key, val in xjson["CpacVariant"].items():
- if isinstance(val, list):
- current_strat.append(val[0])
- else:
- current_strat.append(val)
- current_spread = list(set(variant_pool[xlabel]))
- for spread_label in current_spread:
- if "NO-" in spread_label:
- continue
- if spread_label not in current_strat:
- current_strat.append(f"NO-{spread_label}")
-
- other_strat = []
- for key, val in yjson["CpacVariant"].items():
- if isinstance(val, list):
- other_strat.append(val[0])
- else:
- other_strat.append(val)
- other_spread = list(set(variant_pool[ylabel]))
- for spread_label in other_spread:
- if "NO-" in spread_label:
- continue
- if spread_label not in other_strat:
- other_strat.append(f"NO-{spread_label}")
-
- for variant in current_spread:
- in_current_strat = False
- in_other_strat = False
- in_other_spread = False
-
- if variant is None:
- in_current_strat = True
- if None in other_spread:
- in_other_strat = True
- if variant in current_strat:
- in_current_strat = True
- if variant in other_strat:
- in_other_strat = True
- if variant in other_spread:
- in_other_spread = True
-
- if not in_other_strat:
- if in_other_spread:
- if in_current_strat:
- drop = True
- break
-
- if in_other_strat:
- if in_other_spread:
- if not in_current_strat:
- drop = True
- break
- if drop:
- break
- if drop:
- continue
-
- # make the merged strat label from the multiple inputs
- # strat_list is actually the merged CpacProvenance lists
- pipe_idx = str(strat_list)
- new_strats[pipe_idx] = ResourcePool()
- # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS!
- # placing JSON info at one level higher only for copy convenience
- new_strats[pipe_idx].rpool["json"] = {}
- new_strats[pipe_idx].rpool["json"]["subjson"] = {}
- new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = strat_list
-
- # now just invert resource:strat to strat:resource for each resource:strat
- for cpac_prov in strat_list:
- resource, strat = self.generate_prov_string(cpac_prov)
- resource_strat_dct = self.rpool[resource][strat]
- # remember, `resource_strat_dct` is the dct of 'data' and 'json'.
- new_strats[pipe_idx].rpool[resource] = resource_strat_dct
- # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together.
- self.pipe_list.append(pipe_idx)
- if "CpacVariant" in resource_strat_dct["json"]:
- if "CpacVariant" not in new_strats[pipe_idx].rpool["json"]:
- new_strats[pipe_idx].rpool["json"]["CpacVariant"] = {}
- for younger_resource, variant_list in resource_strat_dct[
- "json"
- ]["CpacVariant"].items():
- if (
- younger_resource
- not in new_strats[pipe_idx].rpool["json"]["CpacVariant"]
- ):
- new_strats[pipe_idx].rpool["json"]["CpacVariant"][
- younger_resource
- ] = variant_list
- # preserve each input's JSON info also
- data_type = resource.split("_")[-1]
- if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]:
- new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {}
- new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update(
- copy.deepcopy(resource_strat_dct["json"])
- )
- else:
- new_strats = {}
- for resource_strat_list in total_pool:
- # total_pool will have only one list of strats, for the one input
- for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs
- resource, pipe_idx = self.generate_prov_string(cpac_prov)
- resource_strat_dct = self.rpool[resource][pipe_idx]
- # remember, `resource_strat_dct` is the dct of 'data' and 'json'.
- new_strats[pipe_idx] = ResourcePool(
- rpool={resource: resource_strat_dct}
- ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS!
- # placing JSON info at one level higher only for copy convenience
- new_strats[pipe_idx].rpool["json"] = resource_strat_dct["json"]
- # TODO: WARNING- THIS IS A LEVEL HIGHER THAN THE ORIGINAL 'JSON' FOR EASE OF ACCESS IN CONNECT_BLOCK WITH THE .GET(JSON)
- new_strats[pipe_idx].rpool["json"]["subjson"] = {}
- new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = cpac_prov
- # preserve each input's JSON info also
- data_type = resource.split("_")[-1]
- if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]:
- new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {}
- new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update(
- copy.deepcopy(resource_strat_dct["json"])
- )
- return new_strats
-
- def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x):
- if label in self.xfm:
- json_info = dict(json_info)
-
- # get the bold-to-template transform from the current strat_pool info
- xfm_idx = None
- xfm_label = "from-bold_to-template_mode-image_xfm"
- for entry in json_info["CpacProvenance"]:
- if isinstance(entry, list):
- if entry[-1].split(":")[0] == xfm_label:
- xfm_prov = entry
- xfm_idx = self.generate_prov_string(xfm_prov)[1]
- break
-
- # but if the resource doesn't have the bold-to-template transform
- # in its provenance/strategy, find the appropriate one for this
- # current pipe_idx/strat
- if not xfm_idx:
- xfm_info = []
- for pipe_idx, entry in self.get(xfm_label).items():
- xfm_info.append((pipe_idx, entry["json"]["CpacProvenance"]))
- else:
- xfm_info = [(xfm_idx, xfm_prov)]
-
- for num, xfm_entry in enumerate(xfm_info):
- xfm_idx, xfm_prov = xfm_entry
- reg_tool = check_prov_for_regtool(xfm_prov)
-
- xfm = transform_derivative(
- f"{label}_xfm_{pipe_x}_{num}",
- label,
- reg_tool,
- self.num_cpus,
- self.num_ants_cores,
- ants_interp=self.ants_interp,
- fsl_interp=self.fsl_interp,
- opt=None,
- )
- wf.connect(connection[0], connection[1], xfm, "inputspec.in_file")
-
- node, out = self.get_data("T1w-brain-template-deriv", quick_single=True)
- wf.connect(node, out, xfm, "inputspec.reference")
-
- node, out = self.get_data(
- "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx
- )
- wf.connect(node, out, xfm, "inputspec.transform")
-
- label = f"space-template_{label}"
- json_info["Template"] = self.get_json_info(
- "T1w-brain-template-deriv", None, "Description"
- )
- new_prov = json_info["CpacProvenance"] + xfm_prov
- json_info["CpacProvenance"] = new_prov
- new_pipe_idx = self.generate_prov_string(new_prov)
- self.set_data(
- label,
- xfm,
- "outputspec.out_file",
- json_info,
- new_pipe_idx,
- f"{label}_xfm_{num}",
- fork=True,
- )
-
- return wf
-
- @property
- def filtered_movement(self) -> bool:
- """
- Check if the movement parameters have been filtered in this strat_pool.
-
- Returns
- -------
- bool
- """
- try:
- return "motion_estimate_filter" in str(
- self.get_cpac_provenance("desc-movementParameters_motion")
- )
- except KeyError:
- # not a strat_pool or no movement parameters in strat_pool
- return False
-
- def filter_name(self, cfg: Configuration) -> str:
- """
- Return the name of the filter for this strategy.
-
- In a strat_pool with filtered movement parameters.
- """
- motion_filters = cfg[
- "functional_preproc",
- "motion_estimates_and_correction",
- "motion_estimate_filter",
- "filters",
- ]
- if len(motion_filters) == 1 and cfg.switch_is_on(
- [
- "functional_preproc",
- "motion_estimates_and_correction",
- "motion_estimate_filter",
- "run",
- ],
- exclusive=True,
- ):
- return motion_filters[0]["Name"]
- try:
- key = "motion"
- sidecar = self.get_json("desc-movementParameters_motion")
- except KeyError:
- sidecar = None
- if sidecar is not None and "CpacVariant" in sidecar:
- if sidecar["CpacVariant"][key]:
- return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1]
- return "none"
-
- def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs):
- input_type = "func_derivative"
-
- post_labels = [(label, connection[0], connection[1])]
-
- if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label):
- # suffix: [eigenvector or degree] centrality [binarized or weighted]
- # or lfcd [binarized or weighted]
- mask = "template-specification-file"
- elif "space-template" in label:
- if "space-template_res-derivative_desc-bold_mask" in self.rpool.keys():
- mask = "space-template_res-derivative_desc-bold_mask"
- else:
- mask = "space-template_desc-bold_mask"
- else:
- mask = "space-bold_desc-brain_mask"
-
- mask_idx = None
- for entry in json_info["CpacProvenance"]:
- if isinstance(entry, list):
- if entry[-1].split(":")[0] == mask:
- mask_prov = entry
- mask_idx = self.generate_prov_string(mask_prov)[1]
- break
-
- if self.smoothing_bool:
- if label in Outputs.to_smooth:
- for smooth_opt in self.smooth_opts:
- sm = spatial_smoothing(
- f"{label}_smooth_{smooth_opt}_{pipe_x}",
- self.fwhm,
- input_type,
- smooth_opt,
- )
- wf.connect(connection[0], connection[1], sm, "inputspec.in_file")
- node, out = self.get_data(
- mask, pipe_idx=mask_idx, quick_single=mask_idx is None
- )
- wf.connect(node, out, sm, "inputspec.mask")
-
- if "desc-" not in label:
- if "space-" in label:
- for tag in label.split("_"):
- if "space-" in tag:
- smlabel = label.replace(tag, f"{tag}_desc-sm")
- break
- else:
- smlabel = f"desc-sm_{label}"
- else:
- for tag in label.split("_"):
- if "desc-" in tag:
- newtag = f"{tag}-sm"
- smlabel = label.replace(tag, newtag)
- break
-
- post_labels.append((smlabel, sm, "outputspec.out_file"))
-
- self.set_data(
- smlabel,
- sm,
- "outputspec.out_file",
- json_info,
- pipe_idx,
- f"spatial_smoothing_{smooth_opt}",
- fork=True,
- )
- self.set_data(
- "fwhm",
- sm,
- "outputspec.fwhm",
- json_info,
- pipe_idx,
- f"spatial_smoothing_{smooth_opt}",
- fork=True,
- )
-
- if self.zscoring_bool:
- for label_con_tpl in post_labels:
- label = label_con_tpl[0]
- connection = (label_con_tpl[1], label_con_tpl[2])
- if label in Outputs.to_zstd:
- zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type)
-
- wf.connect(connection[0], connection[1], zstd, "inputspec.in_file")
-
- node, out = self.get_data(mask, pipe_idx=mask_idx)
- wf.connect(node, out, zstd, "inputspec.mask")
-
- if "desc-" not in label:
- if "space-template" in label:
- new_label = label.replace(
- "space-template", "space-template_desc-zstd"
- )
- else:
- new_label = f"desc-zstd_{label}"
- else:
- for tag in label.split("_"):
- if "desc-" in tag:
- newtag = f"{tag}-zstd"
- new_label = label.replace(tag, newtag)
- break
-
- post_labels.append((new_label, zstd, "outputspec.out_file"))
-
- self.set_data(
- new_label,
- zstd,
- "outputspec.out_file",
- json_info,
- pipe_idx,
- "zscore_standardize",
- fork=True,
- )
-
- elif label in Outputs.to_fisherz:
- zstd = fisher_z_score_standardize(
- f"{label}_zstd_{pipe_x}", label, input_type
- )
-
- wf.connect(
- connection[0], connection[1], zstd, "inputspec.correlation_file"
- )
-
- # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries'
- oned = label.replace("correlations", "timeseries")
-
- node, out = outs[oned]
- wf.connect(node, out, zstd, "inputspec.timeseries_oned")
-
- post_labels.append((new_label, zstd, "outputspec.out_file"))
-
- self.set_data(
- new_label,
- zstd,
- "outputspec.out_file",
- json_info,
- pipe_idx,
- "fisher_zscore_standardize",
- fork=True,
- )
-
- return (wf, post_labels)
-
- def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None):
- excl = []
- substring_excl = []
- outputs_logger = getLogger(f'{cfg["subject_id"]}_expectedOutputs')
- expected_outputs = ExpectedOutputs()
-
- if add_excl:
- excl += add_excl
-
- if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]:
- excl += Outputs.native_nonsmooth
- excl += Outputs.template_nonsmooth
-
- if "raw" not in cfg.post_processing["z-scoring"]["output"]:
- excl += Outputs.native_raw
- excl += Outputs.template_raw
-
- if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]:
- # substring_excl.append(['bold'])
- excl += Outputs.debugging
-
- for resource in self.rpool.keys():
- if resource not in Outputs.any:
- continue
-
- if resource in excl:
- continue
-
- drop = False
- for substring_list in substring_excl:
- bool_list = []
- for substring in substring_list:
- if substring in resource:
- bool_list.append(True)
- else:
- bool_list.append(False)
- for item in bool_list:
- if not item:
- break
- else:
- drop = True
- if drop:
- break
- if drop:
- continue
-
- subdir = "other"
- if resource in Outputs.anat:
- subdir = "anat"
- # TODO: get acq- etc.
- elif resource in Outputs.func:
- subdir = "func"
- # TODO: other stuff like acq- etc.
-
- for pipe_idx in self.rpool[resource]:
- unique_id = self.get_name()
- part_id = unique_id.split("_")[0]
- ses_id = unique_id.split("_")[1]
-
- if "ses-" not in ses_id:
- ses_id = f"ses-{ses_id}"
-
- out_dir = cfg.pipeline_setup["output_directory"]["path"]
- pipe_name = cfg.pipeline_setup["pipeline_name"]
- container = os.path.join(f"pipeline_{pipe_name}", part_id, ses_id)
- filename = f"{unique_id}_{res_in_filename(self.cfg, resource)}"
-
- out_path = os.path.join(out_dir, container, subdir, filename)
-
- out_dct = {
- "unique_id": unique_id,
- "out_dir": out_dir,
- "container": container,
- "subdir": subdir,
- "filename": filename,
- "out_path": out_path,
- }
- self.rpool[resource][pipe_idx]["out"] = out_dct
-
- # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here.
- # TODO: can do the pipeline_description.json variants here too!
-
- for resource in self.rpool.keys():
- if resource not in Outputs.any:
- continue
-
- if resource in excl:
- continue
-
- drop = False
- for substring_list in substring_excl:
- bool_list = []
- for substring in substring_list:
- if substring in resource:
- bool_list.append(True)
- else:
- bool_list.append(False)
- for item in bool_list:
- if not item:
- break
- else:
- drop = True
- if drop:
- break
- if drop:
- continue
-
- num_variant = 0
- if len(self.rpool[resource]) == 1:
- num_variant = ""
- all_jsons = [
- self.rpool[resource][pipe_idx]["json"]
- for pipe_idx in self.rpool[resource]
- ]
- unlabelled = {
- key
- for json_info in all_jsons
- for key in json_info.get("CpacVariant", {}).keys()
- if key not in (*MOVEMENT_FILTER_KEYS, "regressors")
- }
- if "bold" in unlabelled:
- all_bolds = list(
- chain.from_iterable(
- json_info["CpacVariant"]["bold"]
- for json_info in all_jsons
- if "CpacVariant" in json_info
- and "bold" in json_info["CpacVariant"]
- )
- )
- # not any(not) because all is overloaded as a parameter here
- if not any(
- not re.match(
- r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold
- )
- for _bold in all_bolds
- ):
- # this fork point should only result in 0 or 1 forks
- unlabelled.remove("bold")
- del all_bolds
- all_forks = {
- key: set(
- chain.from_iterable(
- json_info["CpacVariant"][key]
- for json_info in all_jsons
- if "CpacVariant" in json_info
- and key in json_info["CpacVariant"]
- )
- )
- for key in unlabelled
- }
- # del all_jsons
- for key, forks in all_forks.items():
- if len(forks) < 2: # noqa: PLR2004
- # no int suffix needed if only one fork
- unlabelled.remove(key)
- # del all_forks
- for pipe_idx in self.rpool[resource]:
- pipe_x = self.get_pipe_number(pipe_idx)
- json_info = self.rpool[resource][pipe_idx]["json"]
- out_dct = self.rpool[resource][pipe_idx]["out"]
-
- try:
- if unlabelled:
- num_variant += 1
- except TypeError:
- pass
-
- try:
- del json_info["subjson"]
- except KeyError:
- pass
-
- if out_dct["subdir"] == "other" and not all:
- continue
-
- unique_id = out_dct["unique_id"]
- resource_idx = resource
-
- if isinstance(num_variant, int):
- resource_idx, out_dct = name_fork(
- resource_idx, cfg, json_info, out_dct
- )
- if unlabelled:
- if "desc-" in out_dct["filename"]:
- for key in out_dct["filename"].split("_")[::-1]:
- # final `desc` entity
- if key.startswith("desc-"):
- out_dct["filename"] = out_dct["filename"].replace(
- key, f"{key}-{num_variant}"
- )
- resource_idx = resource_idx.replace(
- key, f"{key}-{num_variant}"
- )
- break
- else:
- suff = resource.split("_")[-1]
- newdesc_suff = f"desc-{num_variant}_{suff}"
- resource_idx = resource_idx.replace(suff, newdesc_suff)
- id_string = pe.Node(
- Function(
- input_names=[
- "cfg",
- "unique_id",
- "resource",
- "scan_id",
- "template_desc",
- "atlas_id",
- "fwhm",
- "subdir",
- "extension",
- ],
- output_names=["out_filename"],
- function=create_id_string,
- ),
- name=f"id_string_{resource_idx}_{pipe_x}",
- )
- id_string.inputs.cfg = self.cfg
- id_string.inputs.unique_id = unique_id
- id_string.inputs.resource = resource_idx
- id_string.inputs.subdir = out_dct["subdir"]
-
- # grab the iterable scan ID
- if out_dct["subdir"] == "func":
- node, out = self.rpool["scan"]["['scan:func_ingress']"]["data"]
- wf.connect(node, out, id_string, "scan_id")
-
- self.back_propogate_template_name(
- wf, resource_idx, json_info, id_string
- )
- # grab the FWHM if smoothed
- for tag in resource.split("_"):
- if "desc-" in tag and "-sm" in tag:
- fwhm_idx = pipe_idx.replace(f"{resource}:", "fwhm:")
- try:
- node, out = self.rpool["fwhm"][fwhm_idx]["data"]
- wf.connect(node, out, id_string, "fwhm")
- except KeyError:
- # smoothing was not done for this resource in the
- # engine.py smoothing
- pass
- break
- atlas_suffixes = ["timeseries", "correlations", "statmap"]
- # grab the iterable atlas ID
- atlas_id = None
- if not resource.endswith("desc-confounds_timeseries"):
- if resource.split("_")[-1] in atlas_suffixes:
- atlas_idx = pipe_idx.replace(resource, "atlas_name")
- # need the single quote and the colon inside the double
- # quotes - it's the encoded pipe_idx
- # atlas_idx = new_idx.replace(f"'{temp_rsc}:",
- # "'atlas_name:")
- if atlas_idx in self.rpool["atlas_name"]:
- node, out = self.rpool["atlas_name"][atlas_idx]["data"]
- wf.connect(node, out, id_string, "atlas_id")
- elif "atlas-" in resource:
- for tag in resource.split("_"):
- if "atlas-" in tag:
- atlas_id = tag.replace("atlas-", "")
- id_string.inputs.atlas_id = atlas_id
- else:
- warnings.warn(
- str(
- LookupError(
- "\n[!] No atlas ID found for "
- f"{out_dct['filename']}.\n"
- )
- )
- )
- nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}")
- nii_name.inputs.keep_ext = True
-
- if resource in Outputs.ciftis:
- nii_name.inputs.keep_ext = False
- id_string.inputs.extension = Outputs.ciftis[resource]
- else:
- nii_name.inputs.keep_ext = True
-
- if resource in Outputs.giftis:
- nii_name.inputs.keep_ext = False
- id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii"
-
- else:
- nii_name.inputs.keep_ext = True
-
- wf.connect(id_string, "out_filename", nii_name, "format_string")
-
- node, out = self.rpool[resource][pipe_idx]["data"]
- try:
- wf.connect(node, out, nii_name, "in_file")
- except OSError as os_error:
- WFLOGGER.warning(os_error)
- continue
-
- write_json_imports = ["import os", "import json"]
- write_json = pe.Node(
- Function(
- input_names=["json_data", "filename"],
- output_names=["json_file"],
- function=write_output_json,
- imports=write_json_imports,
- ),
- name=f"json_{resource_idx}_{pipe_x}",
- )
- write_json.inputs.json_data = json_info
-
- wf.connect(id_string, "out_filename", write_json, "filename")
- ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}")
- ds.inputs.parameterization = False
- ds.inputs.base_directory = out_dct["out_dir"]
- ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][
- "s3_encryption"
- ]
- ds.inputs.container = out_dct["container"]
-
- if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]:
- ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][
- "aws_output_bucket_credentials"
- ]
- expected_outputs += (
- out_dct["subdir"],
- create_id_string(
- self.cfg,
- unique_id,
- resource_idx,
- template_desc=id_string.inputs.template_desc,
- atlas_id=atlas_id,
- subdir=out_dct["subdir"],
- ),
- )
- wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data')
- wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json')
- outputs_logger.info(expected_outputs)
-
- def node_data(self, resource, **kwargs):
- """Create NodeData objects.
-
- Parameters
- ----------
- resource : str
-
- Returns
- -------
- NodeData
- """
- return NodeData(self, resource, **kwargs)
-
-
-class NodeBlock:
- def __init__(self, node_block_functions, debug=False):
- if not isinstance(node_block_functions, list):
- node_block_functions = [node_block_functions]
-
- self.node_blocks = {}
-
- for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking.
- self.input_interface = []
- if isinstance(node_block_function, tuple):
- self.input_interface = node_block_function[1]
- node_block_function = node_block_function[0]
- if not isinstance(self.input_interface, list):
- self.input_interface = [self.input_interface]
-
- if not isinstance(node_block_function, NodeBlockFunction):
- # If the object is a plain function `__name__` will be more useful than `str()`
- obj_str = (
- node_block_function.__name__
- if hasattr(node_block_function, "__name__")
- else str(node_block_function)
- )
- msg = f'Object is not a nodeblock: "{obj_str}"'
- raise TypeError(msg)
-
- name = node_block_function.name
- self.name = name
- self.node_blocks[name] = {}
-
- if self.input_interface:
- for interface in self.input_interface:
- for orig_input in node_block_function.inputs:
- if isinstance(orig_input, tuple):
- list_tup = list(orig_input)
- if interface[0] in list_tup:
- list_tup.remove(interface[0])
- list_tup.append(interface[1])
- node_block_function.inputs.remove(orig_input)
- node_block_function.inputs.append(tuple(list_tup))
- elif orig_input == interface[0]:
- node_block_function.inputs.remove(interface[0])
- node_block_function.inputs.append(interface[1])
-
- for key, val in node_block_function.legacy_nodeblock_dict().items():
- self.node_blocks[name][key] = val
-
- self.node_blocks[name]["block_function"] = node_block_function
-
- # TODO: fix/replace below
- self.outputs = {}
- for out in node_block_function.outputs:
- self.outputs[out] = None
-
- self.options = ["base"]
- if node_block_function.outputs is not None:
- self.options = node_block_function.outputs
-
- WFLOGGER.info("Connecting %s...", name)
- if debug:
- config.update_config({"logging": {"workflow_level": "DEBUG"}})
- logging.update_logging(config)
- WFLOGGER.debug(
- '"inputs": %s\n\t "outputs": %s%s',
- node_block_function.inputs,
- list(self.outputs.keys()),
- f'\n\t"options": {self.options}'
- if self.options != ["base"]
- else "",
- )
- config.update_config({"logging": {"workflow_level": "INFO"}})
- logging.update_logging(config)
-
- def get_name(self):
- return self.name
-
- def check_null(self, val):
- if isinstance(val, str):
- val = None if val.lower() == "none" else val
- return val
-
- def check_output(self, outputs, label, name):
- if label not in outputs:
- msg = (
- f'\n[!] Output name "{label}" in the block '
- "function does not match the outputs list "
- f'{outputs} in Node Block "{name}"\n'
- )
- raise NameError(msg)
-
- def grab_tiered_dct(self, cfg, key_list):
- cfg_dct = cfg.dict()
- for key in key_list:
- try:
- cfg_dct = cfg_dct.get(key, {})
- except KeyError as ke:
- msg = "[!] The config provided to the node block is not valid"
- raise KeyError(msg) from ke
- return cfg_dct
-
- def connect_block(self, wf, cfg, rpool):
- debug = cfg.pipeline_setup["Debugging"]["verbose"]
- all_opts = []
- for name, block_dct in self.node_blocks.items():
- opts = []
- config = self.check_null(block_dct["config"])
- option_key = self.check_null(block_dct["option_key"])
- option_val = self.check_null(block_dct["option_val"])
- if option_key and option_val:
- if not isinstance(option_key, list):
- option_key = [option_key]
- if not isinstance(option_val, list):
- option_val = [option_val]
- if config:
- key_list = config + option_key
- else:
- key_list = option_key
- if "USER-DEFINED" in option_val:
- # load custom config data into each 'opt'
- opts = self.grab_tiered_dct(cfg, key_list)
- else:
- for option in option_val:
- try:
- if option in self.grab_tiered_dct(cfg, key_list):
- # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list
- opts.append(option)
- except AttributeError as err:
- msg = f"{err}\nNode Block: {name}"
- raise Exception(msg)
-
- if opts is None:
- opts = [opts]
-
- elif option_key and not option_val:
- # enables multiple config forking entries
- if not isinstance(option_key[0], list):
- msg = (
- f"[!] The option_key field ({option_key}) "
- f"for {name} exists but there is no "
- "option_val.\n\nIf you are trying to "
- "populate multiple option keys, the "
- "option_val field must contain a list of "
- "a list.\n"
- )
- raise ValueError(msg)
- for option_config in option_key:
- # option_config is a list of pipe config levels down to the option
- if config:
- key_list = config + option_config
- else:
- key_list = option_config
- option_val = option_config[-1]
- if option_val in self.grab_tiered_dct(cfg, key_list[:-1]):
- opts.append(option_val)
- else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!!
- opts = [None]
- all_opts += opts
-
- sidecar_additions = {
- "CpacConfigHash": hashlib.sha1(
- json.dumps(cfg.dict(), sort_keys=True).encode("utf-8")
- ).hexdigest(),
- "CpacConfig": cfg.dict(),
- }
-
- if cfg["pipeline_setup"]["output_directory"].get("user_defined"):
- sidecar_additions["UserDefined"] = cfg["pipeline_setup"][
- "output_directory"
- ]["user_defined"]
-
- for name, block_dct in self.node_blocks.items():
- # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking.
- switch = self.check_null(block_dct["switch"])
- config = self.check_null(block_dct["config"])
- option_key = self.check_null(block_dct["option_key"])
- option_val = self.check_null(block_dct["option_val"])
- inputs = self.check_null(block_dct["inputs"])
- outputs = self.check_null(block_dct["outputs"])
-
- block_function = block_dct["block_function"]
-
- opts = []
- if option_key and option_val:
- if not isinstance(option_key, list):
- option_key = [option_key]
- if not isinstance(option_val, list):
- option_val = [option_val]
- if config:
- key_list = config + option_key
- else:
- key_list = option_key
- if "USER-DEFINED" in option_val:
- # load custom config data into each 'opt'
- opts = self.grab_tiered_dct(cfg, key_list)
- else:
- for option in option_val:
- if option in self.grab_tiered_dct(cfg, key_list):
- # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list
- opts.append(option)
- else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!!
- opts = [None]
- # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples
- if not opts:
- # for node blocks where the options are split into different
- # block functions - opts will be empty for non-selected
- # options, and would waste the get_strats effort below
- continue
-
- if not switch:
- switch = [True]
- else:
- if config:
- try:
- key_list = config + switch
- except TypeError as te:
- msg = (
- "\n\n[!] Developer info: Docstring error "
- f"for {name}, make sure the 'config' or "
- "'switch' fields are lists.\n\n"
- )
- raise TypeError(msg) from te
- switch = self.grab_tiered_dct(cfg, key_list)
- elif isinstance(switch[0], list):
- # we have multiple switches, which is designed to only work if
- # config is set to "None"
- switch_list = []
- for key_list in switch:
- val = self.grab_tiered_dct(cfg, key_list)
- if isinstance(val, list):
- # fork switches
- if True in val:
- switch_list.append(True)
- if False in val:
- switch_list.append(False)
- else:
- switch_list.append(val)
- if False in switch_list:
- switch = [False]
- else:
- switch = [True]
- else:
- # if config is set to "None"
- key_list = switch
- switch = self.grab_tiered_dct(cfg, key_list)
- if not isinstance(switch, list):
- switch = [switch]
- if True in switch:
- for (
- pipe_idx,
- strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.}
- ) in rpool.get_strats(inputs, debug).items():
- # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} }
- fork = False in switch
- for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys.
- # remember, you can get 'data' or 'json' from strat_pool with member functions
- # strat_pool has all of the JSON information of all the inputs!
- # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output.
- # particularly, our custom 'CpacProvenance' field.
- node_name = name
- pipe_x = rpool.get_pipe_number(pipe_idx)
-
- replaced_inputs = []
- for interface in self.input_interface:
- if isinstance(interface[1], list):
- for input_name in interface[1]:
- if strat_pool.check_rpool(input_name):
- break
- else:
- input_name = interface[1]
- strat_pool.copy_resource(input_name, interface[0])
- replaced_inputs.append(interface[0])
- try:
- wf, outs = block_function(wf, cfg, strat_pool, pipe_x, opt)
- except IOError as e: # duplicate node
- WFLOGGER.warning(e)
- continue
-
- if not outs:
- if block_function.__name__ == "freesurfer_postproc":
- WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA)
- LOGTAIL["warnings"].append(
- WARNING_FREESURFER_OFF_WITH_DATA
- )
- continue
-
- if opt and len(option_val) > 1:
- node_name = f"{node_name}_{opt}"
- elif opt and "USER-DEFINED" in option_val:
- node_name = f'{node_name}_{opt["Name"]}'
-
- if debug:
- verbose_logger = getLogger("CPAC.engine")
- verbose_logger.debug("\n=======================")
- verbose_logger.debug("Node name: %s", node_name)
- prov_dct = rpool.get_resource_strats_from_prov(
- ast.literal_eval(pipe_idx)
- )
- for key, val in prov_dct.items():
- verbose_logger.debug("-------------------")
- verbose_logger.debug("Input - %s:", key)
- sub_prov_dct = rpool.get_resource_strats_from_prov(val)
- for sub_key, sub_val in sub_prov_dct.items():
- sub_sub_dct = rpool.get_resource_strats_from_prov(
- sub_val
- )
- verbose_logger.debug(" sub-input - %s:", sub_key)
- verbose_logger.debug(" prov = %s", sub_val)
- verbose_logger.debug(
- " sub_sub_inputs = %s", sub_sub_dct.keys()
- )
-
- for label, connection in outs.items():
- self.check_output(outputs, label, name)
- new_json_info = copy.deepcopy(strat_pool.get("json"))
-
- # transfer over data-specific json info
- # for example, if the input data json is _bold and the output is also _bold
- data_type = label.split("_")[-1]
- if data_type in new_json_info["subjson"]:
- if (
- "SkullStripped"
- in new_json_info["subjson"][data_type]
- ):
- new_json_info["SkullStripped"] = new_json_info[
- "subjson"
- ][data_type]["SkullStripped"]
-
- # determine sources for the outputs, i.e. all input data into the node block
- new_json_info["Sources"] = [
- x
- for x in strat_pool.get_entire_rpool()
- if x != "json" and x not in replaced_inputs
- ]
-
- if isinstance(outputs, dict):
- new_json_info.update(outputs[label])
- if "Description" not in outputs[label]:
- # don't propagate old Description
- try:
- del new_json_info["Description"]
- except KeyError:
- pass
- if "Template" in outputs[label]:
- template_key = outputs[label]["Template"]
- if template_key in new_json_info["Sources"]:
- # only if the pipeline config template key is entered as the 'Template' field
- # otherwise, skip this and take in the literal 'Template' string
- try:
- new_json_info["Template"] = new_json_info[
- "subjson"
- ][template_key]["Description"]
- except KeyError:
- pass
- try:
- new_json_info["Resolution"] = new_json_info[
- "subjson"
- ][template_key]["Resolution"]
- except KeyError:
- pass
- else:
- # don't propagate old Description
- try:
- del new_json_info["Description"]
- except KeyError:
- pass
-
- if "Description" in new_json_info:
- new_json_info["Description"] = " ".join(
- new_json_info["Description"].split()
- )
-
- for sidecar_key, sidecar_value in sidecar_additions.items():
- if sidecar_key not in new_json_info:
- new_json_info[sidecar_key] = sidecar_value
-
- try:
- del new_json_info["subjson"]
- except KeyError:
- pass
-
- if fork or len(opts) > 1 or len(all_opts) > 1:
- if "CpacVariant" not in new_json_info:
- new_json_info["CpacVariant"] = {}
- raw_label = rpool.get_raw_label(label)
- if raw_label not in new_json_info["CpacVariant"]:
- new_json_info["CpacVariant"][raw_label] = []
- new_json_info["CpacVariant"][raw_label].append(
- node_name
- )
-
- rpool.set_data(
- label,
- connection[0],
- connection[1],
- new_json_info,
- pipe_idx,
- node_name,
- fork,
- )
-
- wf, post_labels = rpool.post_process(
- wf,
- label,
- connection,
- new_json_info,
- pipe_idx,
- pipe_x,
- outs,
- )
-
- if rpool.func_reg:
- for postlabel in post_labels:
- connection = (postlabel[1], postlabel[2])
- wf = rpool.derivative_xfm(
- wf,
- postlabel[0],
- connection,
- new_json_info,
- pipe_idx,
- pipe_x,
- )
- return wf
-
-
-def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt):
- """Wrap a list of node block functions to use within other node blocks.
-
- Example usage:
-
- # This calls the 'bold_mask_afni' and 'bold_masking' node blocks to
- # skull-strip an EPI field map, without having to invoke the NodeBlock
- # connection system.
-
- # The interface dictionary tells wrap_block to set the EPI field map
- # in the parent node block's throw-away strat_pool as 'bold', so that
- # the 'bold_mask_afni' and 'bold_masking' node blocks will see that as
- # the 'bold' input.
-
- # It also tells wrap_block to set the 'desc-brain_bold' output of
- # the 'bold_masking' node block to 'opposite_pe_epi_brain' (what it
- # actually is) in the parent node block's strat_pool, which gets
- # returned.
-
- # Note 'bold' and 'desc-brain_bold' (all on the left side) are the
- # labels that 'bold_mask_afni' and 'bold_masking' understand/expect
- # through their interfaces and docstrings.
-
- # The right-hand side (the values of the 'interface' dictionary) are
- # what 'make sense' within the current parent node block - in this
- # case, the distortion correction node block dealing with field maps.
-
- interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'),
- 'desc-brain_bold': 'opposite_pe_epi_brain'}
- wf, strat_pool = wrap_block([bold_mask_afni, bold_masking],
- interface, wf, cfg, strat_pool,
- pipe_num, opt)
-
- ...further downstream in the parent node block:
-
- node, out = strat_pool.get_data('opposite_pe_epi_brain')
-
- # The above line will connect the output of the 'bold_masking' node
- # block (which is the skull-stripped version of 'opposite_pe_epi') to
- # the next node.
-
- """
- for block in node_blocks:
- # new_pool = copy.deepcopy(strat_pool)
- for in_resource, val in interface.items():
- if isinstance(val, tuple):
- strat_pool.set_data(
- in_resource, val[0], val[1], {}, "", "", fork=True
- ) #
- if "sub_num" not in strat_pool.get_pool_info():
- strat_pool.set_pool_info({"sub_num": 0})
- sub_num = strat_pool.get_pool_info()["sub_num"]
-
- wf, outputs = block(wf, cfg, strat_pool, f"{pipe_num}-{sub_num}", opt) #
- for out, val in outputs.items():
- if out in interface and isinstance(interface[out], str):
- strat_pool.set_data(
- interface[out], outputs[out][0], outputs[out][1], {}, "", ""
- )
- else:
- strat_pool.set_data(out, outputs[out][0], outputs[out][1], {}, "", "")
- sub_num += 1
- strat_pool.set_pool_info({"sub_num": sub_num})
-
- return (wf, strat_pool)
-
-
-def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id):
- if "anat" not in data_paths:
- WFLOGGER.warning("No anatomical data present.")
- return rpool
-
- if "creds_path" not in data_paths:
- data_paths["creds_path"] = None
-
- anat_flow = create_anat_datasource(f"anat_T1w_gather_{part_id}_{ses_id}")
-
- anat = {}
- if isinstance(data_paths["anat"], str):
- anat["T1"] = data_paths["anat"]
- elif "T1w" in data_paths["anat"]:
- anat["T1"] = data_paths["anat"]["T1w"]
-
- if "T1" in anat:
- anat_flow.inputs.inputnode.set(
- subject=part_id,
- anat=anat["T1"],
- creds_path=data_paths["creds_path"],
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- img_type="anat",
- )
- rpool.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress")
-
- if "T2w" in data_paths["anat"]:
- anat_flow_T2 = create_anat_datasource(f"anat_T2w_gather_{part_id}_{ses_id}")
- anat_flow_T2.inputs.inputnode.set(
- subject=part_id,
- anat=data_paths["anat"]["T2w"],
- creds_path=data_paths["creds_path"],
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- img_type="anat",
- )
- rpool.set_data("T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress")
-
- if cfg.surface_analysis["freesurfer"]["ingress_reconall"]:
- rpool = ingress_freesurfer(
- wf, rpool, cfg, data_paths, unique_id, part_id, ses_id
- )
-
- return rpool
-
-
-def ingress_freesurfer(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id):
- try:
- fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], part_id)
- except KeyError:
- WFLOGGER.warning("No FreeSurfer data present.")
- return rpool
-
- # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id)
- if not os.path.exists(fs_path):
- if "sub" in part_id:
- fs_path = os.path.join(
- cfg.pipeline_setup["freesurfer_dir"], part_id.replace("sub-", "")
- )
- else:
- fs_path = os.path.join(
- cfg.pipeline_setup["freesurfer_dir"], ("sub-" + part_id)
- )
-
- # patch for flo-specific data
- if not os.path.exists(fs_path):
- subj_ses = part_id + "-" + ses_id
- fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], subj_ses)
- if not os.path.exists(fs_path):
- WFLOGGER.info("No FreeSurfer data found for subject %s", part_id)
- return rpool
-
- # Check for double nested subj names
- if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))):
- fs_path = os.path.join(fs_path, part_id)
-
- fs_ingress = create_general_datasource("gather_freesurfer_dir")
- fs_ingress.inputs.inputnode.set(
- unique_id=unique_id,
- data=fs_path,
- creds_path=data_paths["creds_path"],
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- rpool.set_data(
- "freesurfer-subject-dir",
- fs_ingress,
- "outputspec.data",
- {},
- "",
- "freesurfer_config_ingress",
- )
-
- recon_outs = {
- "pipeline-fs_raw-average": "mri/rawavg.mgz",
- "pipeline-fs_subcortical-seg": "mri/aseg.mgz",
- "pipeline-fs_brainmask": "mri/brainmask.mgz",
- "pipeline-fs_wmparc": "mri/wmparc.mgz",
- "pipeline-fs_T1": "mri/T1.mgz",
- "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv",
- "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv",
- "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial",
- "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial",
- "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm",
- "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm",
- "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere",
- "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere",
- "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc",
- "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc",
- "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness",
- "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness",
- "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume",
- "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume",
- "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white",
- "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white",
- "pipeline-fs_xfm": "mri/transforms/talairach.lta",
- }
-
- for key, outfile in recon_outs.items():
- fullpath = os.path.join(fs_path, outfile)
- if os.path.exists(fullpath):
- fs_ingress = create_general_datasource(f"gather_fs_{key}_dir")
- fs_ingress.inputs.inputnode.set(
- unique_id=unique_id,
- data=fullpath,
- creds_path=data_paths["creds_path"],
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- rpool.set_data(
- key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress"
- )
- else:
- warnings.warn(
- str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n"))
- )
-
- return rpool
-
-
-def ingress_raw_func_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id):
- func_paths_dct = data_paths["func"]
-
- func_wf = create_func_datasource(
- func_paths_dct, rpool, f"func_ingress_{part_id}_{ses_id}"
- )
- func_wf.inputs.inputnode.set(
- subject=part_id,
- creds_path=data_paths["creds_path"],
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys()))
-
- rpool.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress")
- rpool.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress")
- rpool.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress")
- rpool.set_data(
- "scan-params", func_wf, "outputspec.scan_params", {}, "", "scan_params_ingress"
- )
-
- # TODO: CHECK FOR PARAMETERS
-
- wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata(
- wf, cfg, rpool, data_paths, part_id, data_paths["creds_path"], ses_id
- )
-
- # Memoize list of local functional scans
- # TODO: handle S3 files
- # Skip S3 files for now
-
- local_func_scans = [
- func_paths_dct[scan]["scan"]
- for scan in func_paths_dct.keys()
- if not func_paths_dct[scan]["scan"].startswith("s3://")
- ]
- if local_func_scans:
- # pylint: disable=protected-access
- wf._local_func_scans = local_func_scans
- if cfg.pipeline_setup["Debugging"]["verbose"]:
- verbose_logger = getLogger("CPAC.engine")
- verbose_logger.debug("local_func_scans: %s", local_func_scans)
- del local_func_scans
-
- return (wf, rpool, diff, blip, fmap_rp_list)
-
-
-def ingress_output_dir(
- wf, cfg, rpool, unique_id, data_paths, part_id, ses_id, creds_path=None
-):
- dir_path = data_paths["derivatives_dir"]
-
- WFLOGGER.info("\nPulling outputs from %s.\n", dir_path)
-
- anat = os.path.join(dir_path, "anat")
- func = os.path.join(dir_path, "func")
-
- exts = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"]
-
- outdir_anat = []
- outdir_func = []
- func_paths = {}
- func_dict = {}
-
- for subdir in [anat, func]:
- if os.path.isdir(subdir):
- for filename in os.listdir(subdir):
- for ext in exts:
- if ext in filename:
- if subdir == anat:
- outdir_anat.append(os.path.join(subdir, filename))
- else:
- outdir_func.append(os.path.join(subdir, filename))
-
- # Add derivatives directory to rpool
- ingress = create_general_datasource("gather_derivatives_dir")
- ingress.inputs.inputnode.set(
- unique_id=unique_id,
- data=dir_path,
- creds_path=creds_path,
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- rpool.set_data(
- "derivatives-dir", ingress, "outputspec.data", {}, "", "outdir_config_ingress"
- )
-
- for subdir in [outdir_anat, outdir_func]:
- for filepath in subdir:
- filename = str(filepath)
- for ext in exts:
- filename = filename.split("/")[-1].replace(ext, "")
-
- data_label = filename.split(unique_id)[1].lstrip("_")
-
- if len(filename) == len(data_label):
- msg = (
- "\n\n[!] Possibly wrong participant or "
- "session in this directory?\n\n"
- f"Filepath: {filepath}\n\n"
- )
- raise Exception(msg)
-
- bidstag = ""
- for tag in data_label.split("_"):
- for prefix in ["task-", "run-", "acq-", "rec"]:
- if tag.startswith(prefix):
- bidstag += f"{tag}_"
- data_label = data_label.replace(f"{tag}_", "")
- data_label, json = strip_template(data_label, dir_path, filename)
-
- rpool, json_info, pipe_idx, node_name, data_label = json_outdir_ingress(
- rpool, filepath, exts, data_label, json
- )
-
- if (
- "template" in data_label
- and not json_info["Template"]
- == cfg.pipeline_setup["outdir_ingress"]["Template"]
- ):
- continue
- # Rename confounds to avoid confusion in nuisance regression
- if data_label.endswith("desc-confounds_timeseries"):
- data_label = "pipeline-ingress_desc-confounds_timeseries"
-
- if len(bidstag) > 1:
- # Remove tail symbol
- bidstag = bidstag[:-1]
- if bidstag.startswith("task-"):
- bidstag = bidstag.replace("task-", "")
-
- # Rename bold mask for CPAC naming convention
- # and to avoid collision with anat brain mask
- if data_label.endswith("desc-brain_mask") and filepath in outdir_func:
- data_label = data_label.replace("brain_mask", "bold_mask")
-
- try:
- pipe_x = rpool.get_pipe_number(pipe_idx)
- except ValueError:
- pipe_x = len(rpool.pipe_list)
- if filepath in outdir_anat:
- ingress = create_general_datasource(
- f"gather_anat_outdir_{data_label!s}_{pipe_x}"
- )
- ingress.inputs.inputnode.set(
- unique_id=unique_id,
- data=filepath,
- creds_path=creds_path,
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- rpool.set_data(
- data_label,
- ingress,
- "outputspec.data",
- json_info,
- pipe_idx,
- node_name,
- f"outdir_{data_label}_ingress",
- inject=True,
- )
- else:
- if data_label.endswith("desc-preproc_bold"):
- func_key = data_label
- func_dict[bidstag] = {}
- func_dict[bidstag]["scan"] = str(filepath)
- func_dict[bidstag]["scan_parameters"] = json_info
- func_dict[bidstag]["pipe_idx"] = pipe_idx
- if data_label.endswith("desc-brain_mask"):
- data_label = data_label.replace("brain_mask", "bold_mask")
- try:
- func_paths[data_label].append(filepath)
- except:
- func_paths[data_label] = []
- func_paths[data_label].append(filepath)
-
- if func_dict:
- wf, rpool = func_outdir_ingress(
- wf,
- cfg,
- func_dict,
- rpool,
- unique_id,
- creds_path,
- part_id,
- func_key,
- func_paths,
- )
-
- if cfg.surface_analysis["freesurfer"]["ingress_reconall"]:
- rpool = ingress_freesurfer(
- wf, rpool, cfg, data_paths, unique_id, part_id, ses_id
- )
- return wf, rpool
-
-
-def json_outdir_ingress(rpool, filepath, exts, data_label, json):
- desc_val = None
- for tag in data_label.split("_"):
- if "desc-" in tag:
- desc_val = tag
- break
- jsonpath = str(filepath)
- for ext in exts:
- jsonpath = jsonpath.replace(ext, "")
- jsonpath = f"{jsonpath}.json"
-
- if not os.path.exists(jsonpath):
- WFLOGGER.info(
- "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", filepath, jsonpath
- )
- json_info = {
- "Description": "This data was generated elsewhere and "
- "supplied by the user into this C-PAC run's "
- "output directory. This JSON file was "
- "automatically generated by C-PAC because a "
- "JSON file was not supplied with the data."
- }
- json_info = {**json_info, **json}
- write_output_json(json_info, jsonpath)
- else:
- json_info = read_json(jsonpath)
- json_info = {**json_info, **json}
- if "CpacProvenance" in json_info:
- if desc_val:
- # it's a C-PAC output, let's check for pipe_idx/strat integer
- # suffixes in the desc- entries.
- only_desc = str(desc_val)
-
- if only_desc[-1].isdigit():
- for idx in range(0, 3):
- # let's stop at 3, please don't run >999 strategies okay?
- if only_desc[-1].isdigit():
- only_desc = only_desc[:-1]
-
- if only_desc[-1] == "-":
- only_desc = only_desc.rstrip("-")
- else:
- msg = (
- "\n[!] Something went wrong with either "
- "reading in the output directory or when "
- "it was written out previously.\n\nGive "
- "this to your friendly local C-PAC "
- f"developer:\n\n{data_label!s}\n"
- )
- raise IOError(msg)
-
- # remove the integer at the end of the desc-* variant, we will
- # get the unique pipe_idx from the CpacProvenance below
- data_label = data_label.replace(desc_val, only_desc)
-
- # preserve cpac provenance/pipe_idx
- pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"])
- node_name = ""
-
- else:
- json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"]
- if "Description" not in json_info:
- json_info["Description"] = (
- "This data was generated elsewhere and "
- "supplied by the user into this C-PAC run's "
- "output directory. This JSON file was "
- "automatically generated by C-PAC because a "
- "JSON file was not supplied with the data."
- )
- pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"])
- node_name = f"{data_label}_ingress"
-
- return rpool, json_info, pipe_idx, node_name, data_label
-
-
-def func_outdir_ingress(
- wf, cfg, func_dict, rpool, unique_id, creds_path, part_id, key, func_paths
-):
- pipe_x = len(rpool.pipe_list)
- ingress = create_func_datasource(
- func_dict, rpool, f"gather_func_outdir_{key}_{pipe_x}"
- )
- ingress.inputs.inputnode.set(
- subject=unique_id,
- creds_path=creds_path,
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- rpool.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress")
- ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys()))
- rpool.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress")
-
- rpool.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress")
- rpool.set_data(
- "scan-params", ingress, "outputspec.scan_params", {}, "", "scan_params_ingress"
- )
- wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata(
- wf, cfg, rpool, func_dict, part_id, creds_path, key
- )
-
- # Have to do it this weird way to save the parsed BIDS tag & filepath
- mask_paths_key = (
- "desc-bold_mask"
- if "desc-bold_mask" in func_paths
- else "space-template_desc-bold_mask"
- )
- ts_paths_key = "pipeline-ingress_desc-confounds_timeseries"
-
- # Connect func data with approproate scan name
- iterables = pe.Node(
- Function(
- input_names=["scan", "mask_paths", "ts_paths"],
- output_names=["out_scan", "mask", "confounds"],
- function=set_iterables,
- ),
- name=f"set_iterables_{pipe_x}",
- )
- iterables.inputs.mask_paths = func_paths[mask_paths_key]
- iterables.inputs.ts_paths = func_paths[ts_paths_key]
- wf.connect(ingress, "outputspec.scan", iterables, "scan")
-
- for key in func_paths:
- if key in (mask_paths_key, ts_paths_key):
- ingress_func = create_general_datasource(f"ingress_func_data_{key}")
- ingress_func.inputs.inputnode.set(
- unique_id=unique_id,
- creds_path=creds_path,
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan")
- if key == mask_paths_key:
- wf.connect(iterables, "mask", ingress_func, "inputnode.data")
- rpool.set_data(
- key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress"
- )
- elif key == ts_paths_key:
- wf.connect(iterables, "confounds", ingress_func, "inputnode.data")
- rpool.set_data(
- key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress"
- )
-
- return wf, rpool
-
-
-def set_iterables(scan, mask_paths=None, ts_paths=None):
- # match scan with filepath to get filepath
- mask_path = [path for path in mask_paths if scan in path]
- ts_path = [path for path in ts_paths if scan in path]
-
- return (scan, mask_path[0], ts_path[0])
-
-
-def strip_template(data_label, dir_path, filename):
- json = {}
- # rename to template
- for prefix in ["space-", "from-", "to-"]:
- for bidstag in data_label.split("_"):
- if bidstag.startswith(prefix):
- template_key, template_val = bidstag.split("-")
- template_name, _template_desc = lookup_identifier(template_val)
- if template_name:
- json["Template"] = template_val
- data_label = data_label.replace(template_val, "template")
- elif bidstag.startswith("res-"):
- res_key, res_val = bidstag.split("-")
- json["Resolution"] = res_val
- data_label = data_label.replace(bidstag, "")
- if data_label.find("__"):
- data_label = data_label.replace("__", "_")
- return data_label, json
-
-
-def ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path=None):
- # ingress config file paths
- # TODO: may want to change the resource keys for each to include one level up in the YAML as well
-
- import pandas as pd
- import pkg_resources as p
-
- template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv")
- template_df = pd.read_csv(template_csv, keep_default_na=False)
-
- for row in template_df.itertuples():
- key = row.Key
- val = row.Pipeline_Config_Entry
- val = cfg.get_nested(cfg, [x.lstrip() for x in val.split(",")])
- resolution = row.Intended_Resolution_Config_Entry
- desc = row.Description
-
- if not val:
- continue
-
- if resolution:
- res_keys = [x.lstrip() for x in resolution.split(",")]
- tag = res_keys[-1]
- json_info = {}
-
- if "$FSLDIR" in val:
- val = val.replace("$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"])
- if "$priors_path" in val:
- priors_path = (
- cfg.segmentation["tissue_segmentation"]["FSL-FAST"]["use_priors"][
- "priors_path"
- ]
- or ""
- )
- if "$FSLDIR" in priors_path:
- priors_path = priors_path.replace(
- "$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"]
- )
- val = val.replace("$priors_path", priors_path)
- if "${resolution_for_anat}" in val:
- val = val.replace(
- "${resolution_for_anat}",
- cfg.registration_workflows["anatomical_registration"][
- "resolution_for_anat"
- ],
- )
- if "${func_resolution}" in val:
- val = val.replace(
- "${func_resolution}",
- cfg.registration_workflows["functional_registration"][
- "func_registration_to_template"
- ]["output_resolution"][tag],
- )
-
- if desc:
- template_name, _template_desc = lookup_identifier(val)
- if template_name:
- desc = f"{template_name} - {desc}"
- json_info["Description"] = f"{desc} - {val}"
- if resolution:
- resolution = cfg.get_nested(cfg, res_keys)
- json_info["Resolution"] = resolution
-
- resampled_template = pe.Node(
- Function(
- input_names=["resolution", "template", "template_name", "tag"],
- output_names=["resampled_template"],
- function=resolve_resolution,
- as_module=True,
- ),
- name="resampled_" + key,
- )
-
- resampled_template.inputs.resolution = resolution
- resampled_template.inputs.template = val
- resampled_template.inputs.template_name = key
- resampled_template.inputs.tag = tag
-
- # the set_data below is set up a little differently, because we are
- # injecting and also over-writing already-existing entries
- # other alternative would have been to ingress into the
- # resampled_template node from the already existing entries, but we
- # didn't do that here
- rpool.set_data(
- key,
- resampled_template,
- "resampled_template",
- json_info,
- "",
- "template_resample",
- ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually
-
- elif val:
- config_ingress = create_general_datasource(f"gather_{key}")
- config_ingress.inputs.inputnode.set(
- unique_id=unique_id,
- data=val,
- creds_path=creds_path,
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- rpool.set_data(
- key,
- config_ingress,
- "outputspec.data",
- json_info,
- "",
- f"{key}_config_ingress",
- )
- # templates, resampling from config
- """
- template_keys = [
- ("anat", ["network_centrality", "template_specification_file"]),
- ("anat", ["nuisance_corrections", "2-nuisance_regression",
- "lateral_ventricles_mask"]),
- ("anat",
- ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors",
- "CSF_path"]),
- ("anat",
- ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors",
- "GM_path"]),
- ("anat",
- ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors",
- "WM_path"]),
- ("anat",
- ["segmentation", "tissue_segmentation", "Template_Based", "CSF"]),
- ("anat",
- ["segmentation", "tissue_segmentation", "Template_Based", "GRAY"]),
- ("anat",
- ["segmentation", "tissue_segmentation", "Template_Based", "WHITE"]),
- ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_ACPC_template"]),
- ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_brain_ACPC_template"]),
- ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_ACPC_template"]),
- ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_brain_ACPC_template"])]
-
- def get_nested_attr(c, template_key):
- attr = getattr(c, template_key[0])
- keys = template_key[1:]
-
- def _get_nested(attr, keys):
- if len(keys) > 1:
- return (_get_nested(attr[keys[0]], keys[1:]))
- elif len(keys):
- return (attr[keys[0]])
- else:
- return (attr)
-
- return (_get_nested(attr, keys))
-
- def set_nested_attr(c, template_key, value):
- attr = getattr(c, template_key[0])
- keys = template_key[1:]
-
- def _set_nested(attr, keys):
- if len(keys) > 1:
- return (_set_nested(attr[keys[0]], keys[1:]))
- elif len(keys):
- attr[keys[0]] = value
- else:
- return (attr)
-
- return (_set_nested(attr, keys))
-
- for key_type, key in template_keys:
- attr = cfg.get_nested(cfg, key)
- if isinstance(attr, str) or attr == None:
- node = create_check_for_s3_node(
- key[-1],
- attr, key_type,
- data_paths['creds_path'],
- cfg.pipeline_setup['working_directory']['path'],
- map_node=False
- )
- cfg.set_nested(cfg, key, node)
-
- template_keys_in_list = [
- ("anat",
- ["segmentation", "tissue_segmentation", "ANTs_Prior_Based",
- "template_brain_list"]),
- ("anat",
- ["segmentation", "tissue_segmentation", "ANTs_Prior_Based",
- "template_segmentation_list"]),
- ]
-
- for key_type, key in template_keys_in_list:
- node = create_check_for_s3_node(
- key[-1],
- cfg.get_nested(cfg, key), key_type,
- data_paths['creds_path'],
- cfg.pipeline_setup['working_directory']['path'],
- map_node=True
- )
- cfg.set_nested(cfg, key, node)
- """
-
- return rpool
-
-
-def initiate_rpool(wf, cfg, data_paths=None, part_id=None):
- """
- Initialize a new ResourcePool.
-
- data_paths format:
- {'anat': {
- 'T1w': '{T1w path}',
- 'T2w': '{T2w path}'
- },
- 'creds_path': {None OR path to credentials CSV},
- 'func': {
- '{scan ID}':
- {
- 'scan': '{path to BOLD}',
- 'scan_parameters': {scan parameter dictionary}
- }
- },
- 'site_id': 'site-ID',
- 'subject_id': 'sub-01',
- 'unique_id': 'ses-1',
- 'derivatives_dir': '{derivatives_dir path}'}
- """
- # TODO: refactor further, integrate with the ingress_data functionality
- # TODO: used for BIDS-Derivatives (below), and possible refactoring of
- # TODO: the raw data config to use 'T1w' label instead of 'anat' etc.
-
- if data_paths:
- part_id = data_paths["subject_id"]
- ses_id = data_paths["unique_id"]
- if "creds_path" not in data_paths:
- creds_path = None
- else:
- creds_path = data_paths["creds_path"]
- unique_id = f"{part_id}_{ses_id}"
-
- elif part_id:
- unique_id = part_id
- creds_path = None
-
- rpool = ResourcePool(name=unique_id, cfg=cfg)
-
- if data_paths:
- # ingress outdir
- try:
- if (
- data_paths["derivatives_dir"]
- and cfg.pipeline_setup["outdir_ingress"]["run"]
- ):
- wf, rpool = ingress_output_dir(
- wf,
- cfg,
- rpool,
- unique_id,
- data_paths,
- part_id,
- ses_id,
- creds_path=None,
- )
- except:
- rpool = ingress_raw_anat_data(
- wf, rpool, cfg, data_paths, unique_id, part_id, ses_id
- )
- if "func" in data_paths:
- wf, rpool, diff, blip, fmap_rp_list = ingress_raw_func_data(
- wf, rpool, cfg, data_paths, unique_id, part_id, ses_id
- )
-
- # grab any file paths from the pipeline config YAML
- rpool = ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path)
-
- # output files with 4 different scans
-
- return (wf, rpool)
-
-
-def run_node_blocks(blocks, data_paths, cfg=None):
- import os
-
- from CPAC.pipeline import nipype_pipeline_engine as pe
- from CPAC.pipeline.engine import NodeBlock
-
- if not cfg:
- cfg = {
- "pipeline_setup": {
- "working_directory": {"path": os.getcwd()},
- "log_directory": {"path": os.getcwd()},
- }
- }
-
- # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!!
- _, rpool = initiate_rpool(cfg, data_paths)
-
- wf = pe.Workflow(name="node_blocks")
- wf.base_dir = cfg.pipeline_setup["working_directory"]["path"]
- wf.config["execution"] = {
- "hash_method": "timestamp",
- "crashdump_dir": cfg.pipeline_setup["log_directory"]["path"],
- }
-
- run_blocks = []
- if rpool.check_rpool("desc-preproc_T1w"):
- WFLOGGER.info("Preprocessed T1w found, skipping anatomical preprocessing.")
- else:
- run_blocks += blocks[0]
- if rpool.check_rpool("desc-preproc_bold"):
- WFLOGGER.info("Preprocessed BOLD found, skipping functional preprocessing.")
- else:
- run_blocks += blocks[1]
-
- for block in run_blocks:
- wf = NodeBlock(
- block, debug=cfg["pipeline_setup", "Debugging", "verbose"]
- ).connect_block(wf, cfg, rpool)
- rpool.gather_pipes(wf, cfg)
-
- wf.run()
-
-
-class NodeData:
- r"""Attribute access for ResourcePool.get_data outputs.
-
- Class to hold outputs of CPAC.pipeline.engine.ResourcePool().get_data(), so one can
- do ``node_data = strat_pool.node_data(resource)`` and have ``node_data.node`` and
- ``node_data.out`` instead of doing ``node, out = strat_pool.get_data(resource)``
- and needing two variables (``node`` and ``out``) to store that information.
-
- Also includes ``variant`` attribute providing the resource's self-keyed value
- within its ``CpacVariant`` dictionary.
-
- Examples
- --------
- >>> rp = ResourcePool()
- >>> rp.node_data(None)
- NotImplemented (NotImplemented)
-
- >>> rp.set_data('test',
- ... pe.Node(Function(input_names=[]), 'test'),
- ... 'b', [], 0, 'test')
- >>> rp.node_data('test')
- test (b)
- >>> rp.node_data('test').out
- 'b'
-
- >>> try:
- ... rp.node_data('b')
- ... except LookupError as lookup_error:
- ... print(str(lookup_error).strip().split('\n')[0].strip())
- [!] C-PAC says: None of the listed resources are in the resource pool:
- """
-
- # pylint: disable=too-few-public-methods
- def __init__(self, strat_pool=None, resource=None, **kwargs):
- self.node = NotImplemented
- self.out = NotImplemented
- if strat_pool is not None and resource is not None:
- self.node, self.out = strat_pool.get_data(resource, **kwargs)
-
- def __repr__(self): # noqa: D105
- return f'{getattr(self.node, "name", str(self.node))} ({self.out})'
diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py
new file mode 100644
index 0000000000..534c9f7450
--- /dev/null
+++ b/CPAC/pipeline/engine/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2021-2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""C-PAC engine."""
+
+from .nodeblock import NodeBlock
+from .resource import ResourcePool, StratPool
+
+__all__ = [
+ "NodeBlock",
+ "ResourcePool",
+ "StratPool",
+]
diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py
new file mode 100644
index 0000000000..e68bfbf0d2
--- /dev/null
+++ b/CPAC/pipeline/engine/nodeblock.py
@@ -0,0 +1,349 @@
+# Copyright (C) 2023-2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""Classes and decorator for :py:class:`NodeBlock` s and :py:class:`NodeBlockFunction` s."""
+
+from typing import Any, Callable, Optional, TYPE_CHECKING
+
+import yaml
+from nipype import config, logging # type: ignore [import-untyped]
+from nipype.pipeline.engine import Workflow # type: ignore[import-untyped]
+
+from CPAC.utils.configuration.configuration import Configuration
+from CPAC.utils.monitoring import (
+ WFLOGGER,
+)
+
+if TYPE_CHECKING:
+ from CPAC.pipeline.engine.resource import ResourceData, StratPool
+
+NODEBLOCK_INPUTS = list[str | list | tuple]
+NODEBLOCK_OUTPUTS = list[str] | dict[str, Any]
+PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"]
+
+
+class NodeBlockFunction:
+ """Store a reference to the nodeblock function and all of its metadata."""
+
+ def __init__(
+ self,
+ func: Callable,
+ name: str,
+ config: Optional[list[str]] = None,
+ switch: Optional[list[str] | list[list[str]]] = None,
+ option_key: Optional[str | list[str]] = None,
+ option_val: Optional[str | list[str]] = None,
+ inputs: Optional[NODEBLOCK_INPUTS] = None,
+ outputs: Optional[NODEBLOCK_OUTPUTS] = None,
+ ) -> None:
+ self.func = func
+ """:py:class:`Nodeblock` function reference."""
+ self.name: str = name
+ """Used in the graph and logging to identify the :py:class:`NodeBlock` and its
+ component :py:class:`~nipype.pipeline.engine.Node` s."""
+ self.config: Optional[list[str]] = config
+ """
+ Indicates the nested keys in a C-PAC pipeline
+ :py:class:`~CPAC.utils.configuration.Configuration` should configure a
+ :py:class:`NodeBlock` built from this function. If `config` is set to ``None``,
+ then all other :py:class:`~CPAC.utils.configuration.Configuration` -related
+ entities must be specified from the root of the :py:class:`~CPAC.utils.configuration.Configuration` .
+ """
+ self.switch: Optional[list[str] | list[list[str]]] = switch
+ """
+ Indicates any keys that should evaluate to ``True`` for this :py:class:`NodeBlock`
+ to be active. A list of lists of strings indicates multiple `switch` es
+ that must all be ``True`` to run, and is currently only an option if `config` is
+ set to ``None``.
+ """
+ self.option_key: Optional[str | list[str]] = option_key
+ """
+ Indicates the nested keys (starting at the nested key indicated by `config`)
+ that should configure this :py:class:`NodeBlock`.
+ """
+ self.option_val: Optional[str | list[str]] = option_val
+ """Indicates values for which this :py:class:`NodeBlock` should be active."""
+ self.inputs: list[str | list | tuple] = inputs if inputs else []
+ """:py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating
+ resources needed for the :py:class:`NodeBlock`'s functionality."""
+ self.outputs: list[str] | dict[str, Any] = outputs if outputs else []
+ """
+ :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating
+ resources generated or updated by the :py:class:`NodeBlock`, optionally
+ including metadata for the outputs' respective sidecars.
+ """
+
+ # Forward function attributes similar to functools.update_wrapper:
+ # https://docs.python.org/3/library/functools.html#functools.update_wrapper
+ self.__module__ = func.__module__
+ self.__name__ = func.__name__
+ self.__qualname__ = func.__qualname__
+ self.__annotations__ = func.__annotations__
+ self.__doc__ = "".join(
+ [
+ _.replace(" ", "")
+ for _ in [func.__doc__, "", "", NodeBlockFunction.__call__.__doc__]
+ if _ is not None
+ ]
+ ).rstrip()
+
+ def __call__(
+ self,
+ wf: Workflow,
+ cfg: Configuration,
+ strat_pool: "StratPool",
+ pipe_num: Optional[int | str],
+ opt: Optional[str] = None,
+ ) -> tuple[Workflow, dict[str, "ResourceData"]]:
+ """Call a :py:class:`NodeBlockFunction`.
+
+ All :py:class:`NodeBlockFunction` s have the same signature.
+ """
+ return self.func(wf, cfg, strat_pool, pipe_num, opt)
+
+ def legacy_nodeblock_dict(self):
+ """Return :py:class:`NodeBlock` metadata as a dictionary.
+
+ Helper for compatibility reasons.
+ """
+ return {
+ "name": self.name,
+ "config": self.config,
+ "switch": self.switch,
+ "option_key": self.option_key,
+ "option_val": self.option_val,
+ "inputs": self.inputs,
+ "outputs": self.outputs,
+ }
+
+ def __repr__(self) -> str:
+ """Return reproducible string representation of a :py:class:`NodeBlockFunction`."""
+ return (
+ f"NodeBlockFunction({self.func.__module__}."
+ f'{self.func.__name__}, "{self.name}", '
+ f"config={self.config}, switch={self.switch}, "
+ f"option_key={self.option_key}, option_val="
+ f"{self.option_val}, inputs={self.inputs}, "
+ f"outputs={self.outputs})"
+ )
+
+ def __str__(self) -> str:
+ """Return string representation of a :py:class:`NodeBlockFunction`."""
+ return f"NodeBlockFunction({self.name})"
+
+
+class NodeBlock:
+ """A :py:class:`~nipype.pipeline.engine.Workflow` subgraph composed of :py:class:`NodeBlockFunction` s."""
+
+ def __init__(
+ self,
+ node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS,
+ debug: bool = False,
+ ) -> None:
+ """Create a :py:class:`NodeBlock` from a list of :py:class:`NodeBlockFunction` s."""
+ if not isinstance(node_block_functions, list):
+ node_block_functions = [node_block_functions]
+
+ self.node_blocks: dict[str, Any] = {}
+
+ for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking.
+ self.input_interface = []
+ if isinstance(node_block_function, tuple):
+ self.input_interface = node_block_function[1]
+ node_block_function = node_block_function[0] # noqa: PLW2901
+ if not isinstance(self.input_interface, list):
+ self.input_interface = [self.input_interface]
+
+ if not isinstance(node_block_function, NodeBlockFunction):
+ # If the object is a plain function `__name__` will be more useful than `str()`
+ obj_str = (
+ node_block_function.__name__ # type: ignore [attr-defined]
+ if hasattr(node_block_function, "__name__")
+ else str(node_block_function)
+ )
+ msg = f'Object is not a nodeblock: "{obj_str}"'
+ raise TypeError(msg)
+
+ name = node_block_function.name
+ self.name = name
+ self.node_blocks[name] = {}
+
+ if self.input_interface:
+ for interface in self.input_interface:
+ for orig_input in node_block_function.inputs:
+ if isinstance(orig_input, tuple):
+ list_tup = list(orig_input)
+ if interface[0] in list_tup:
+ list_tup.remove(interface[0])
+ list_tup.append(interface[1])
+ node_block_function.inputs.remove(orig_input)
+ node_block_function.inputs.append(tuple(list_tup))
+ elif orig_input == interface[0]:
+ node_block_function.inputs.remove(interface[0])
+ node_block_function.inputs.append(interface[1])
+
+ for key, val in node_block_function.legacy_nodeblock_dict().items():
+ self.node_blocks[name][key] = val
+
+ self.node_blocks[name]["block_function"] = node_block_function
+
+ # TODO: fix/replace below
+ self.outputs: dict[str, Optional[str]] = {}
+ for out in node_block_function.outputs:
+ self.outputs[out] = None
+
+ self.options: list[str] | dict[str, Any] = ["base"]
+ if node_block_function.outputs is not None:
+ self.options = node_block_function.outputs
+
+ WFLOGGER.info("Connecting %s...", name)
+ if debug:
+ config.update_config({"logging": {"workflow_level": "DEBUG"}})
+ logging.update_logging(config)
+ WFLOGGER.debug(
+ '"inputs": %s\n\t "outputs": %s%s',
+ node_block_function.inputs,
+ list(self.outputs.keys()),
+ f'\n\t"options": {self.options}'
+ if self.options != ["base"]
+ else "",
+ )
+ config.update_config({"logging": {"workflow_level": "INFO"}})
+ logging.update_logging(config)
+
+ def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> None:
+ """Check if a label is listed in a :py:class:`NodeBlock` 's `outputs`.
+
+ Raises
+ ------
+ NameError
+ If a mismatch is found.
+ """
+ if label not in outputs:
+ msg = (
+ f'\n[!] Output name "{label}" in the block '
+ "function does not match the outputs list "
+ f'{outputs} in Node Block "{name}"\n'
+ )
+ raise NameError(msg)
+
+ @staticmethod
+ def list_blocks(
+ pipeline_blocks: PIPELINE_BLOCKS, indent: Optional[int] = None
+ ) -> str:
+ """List :py:class:`NodeBlockFunction` s line by line.
+
+ Parameters
+ ----------
+ pipeline_blocks
+ list of :py:class:`NodeBlockFunction` s
+
+ indent
+ number of spaces after a tab indent
+
+ Returns
+ -------
+ str
+ formatted list of :py:class:`NodeBlockFunction` s
+ """
+ blockstring = yaml.dump(
+ [
+ getattr(
+ block,
+ "__name__",
+ getattr(
+ block,
+ "name",
+ yaml.safe_load(NodeBlock.list_blocks(list(block)))
+ if isinstance(block, (tuple, list, set))
+ else str(block),
+ ),
+ )
+ for block in pipeline_blocks
+ ]
+ )
+ if isinstance(indent, int):
+ blockstring = "\n".join(
+ [
+ "\t" + " " * indent + line.replace("- - ", "- ")
+ for line in blockstring.split("\n")
+ ]
+ )
+ return blockstring
+
+
+def nodeblock(
+ name: Optional[str] = None,
+ config: Optional[list[str]] = None,
+ switch: Optional[list[str] | list[list[str]]] = None,
+ option_key: Optional[str | list[str]] = None,
+ option_val: Optional[str | list[str]] = None,
+ inputs: Optional[NODEBLOCK_INPUTS] = None,
+ outputs: Optional[list[str] | dict[str, Any]] = None,
+):
+ """Define a :py:class:`NodeBlockFunction` .
+
+ Connections to the pipeline :py:class:`~CPAC.utils.configuration.Configuration` and to other :py:class:`NodeBlockFunction` s.
+
+ Parameters
+ ----------
+ name
+ Used in the graph and logging to identify the :py:class:`NodeBlock` and its
+ component :py:class:`~nipype.pipeline.engine.Node` s.
+ The :py:class:`NodeBlockFunction`'s `.__name__` is used if `name` is not
+ provided.
+
+ config
+ Indicates the nested keys in a C-PAC pipeline
+ :py:class:`~CPAC.pipeline.configuration.Configuration` should configure a
+ :py:class:`NodeBlock` built from this :py:class:`NodeBlockFunction`. If `config`
+ is set to ``None``, then all other
+ :py:class:`~CPAC.pipeline.configuration.Configuration` -related entities
+ must be specified from the root of the
+ :py:class:`~CPAC.pipeline.configuration.Configuration` .
+
+ switch
+ Indicates any keys that should evaluate to ``True`` for this
+ :py:class:`NodeBlock` to be active. A list of lists of strings indicates
+ multiple switches that must all be ``True`` to run, and is currently only an
+ option if config is set to ``None``.
+
+ option_key
+ Indicates the nested keys (starting at the nested key indicated by `config`)
+ that should configure this :py:class:`NodeBlock`.
+
+ option_val
+ Indicates values for which this :py:class:`NodeBlock` should be active.
+
+ inputs
+ :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files needed for the :py:class:`NodeBlock` 's
+ functionality.
+
+ outputs
+ :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files
+ generated or updated by the :py:class:`NodeBlock`, optionally including metadata
+ for the `outputs` ' respective sidecars.
+ """
+ return lambda func: NodeBlockFunction(
+ func,
+ name if name is not None else func.__name__,
+ config,
+ switch,
+ option_key,
+ option_val,
+ inputs,
+ outputs,
+ )
diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py
new file mode 100644
index 0000000000..988d4bdc04
--- /dev/null
+++ b/CPAC/pipeline/engine/resource.py
@@ -0,0 +1,3261 @@
+# Copyright (C) 2021-2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+""":py:class:`Resource` s and :py:class:`ResourcePool` s for C-PAC."""
+
+import ast
+from collections.abc import KeysView
+from copy import deepcopy
+import hashlib
+from itertools import chain
+import json
+import os
+from pathlib import Path
+import re
+from types import NoneType
+from typing import Any, Literal, NamedTuple, Optional, overload
+
+from nipype.interfaces import utility as util # type: ignore [import-untyped]
+from nipype.interfaces.utility import Rename # type: ignore [import-untyped]
+from nipype.pipeline import engine as pe # type: ignore [import-untyped]
+
+from CPAC.image_utils.spatial_smoothing import spatial_smoothing
+from CPAC.image_utils.statistical_transforms import (
+ fisher_z_score_standardize,
+ z_score_standardize,
+)
+from CPAC.pipeline.check_outputs import ExpectedOutputs
+from CPAC.pipeline.engine.nodeblock import (
+ NodeBlock,
+ NODEBLOCK_INPUTS,
+ NODEBLOCK_OUTPUTS,
+ NodeBlockFunction,
+ PIPELINE_BLOCKS,
+)
+from CPAC.pipeline.utils import name_fork, source_set
+from CPAC.registration.registration import transform_derivative
+from CPAC.resources.templates.lookup_table import lookup_identifier
+from CPAC.utils.bids_utils import res_in_filename
+from CPAC.utils.configuration.configuration import Configuration, Preconfiguration
+from CPAC.utils.datasource import (
+ calc_delta_te_and_asym_ratio,
+ check_for_s3,
+ check_func_scan,
+ create_anat_datasource,
+ create_fmap_datasource,
+ create_general_datasource,
+ gather_echo_times,
+ get_fmap_phasediff_metadata,
+ get_rest,
+ resolve_resolution,
+)
+from CPAC.utils.interfaces.datasink import DataSink
+from CPAC.utils.interfaces.function import Function
+from CPAC.utils.monitoring import (
+ getLogger,
+ LOGTAIL,
+ UTLOGGER,
+ WARNING_FREESURFER_OFF_WITH_DATA,
+ WFLOGGER,
+)
+from CPAC.utils.outputs import Outputs
+from CPAC.utils.typing import LIST_OF_LIST_OF_STR, PIPE_IDX
+from CPAC.utils.utils import (
+ check_prov_for_regtool,
+ create_id_string,
+ get_last_prov_entry,
+ get_scan_params,
+ read_json,
+ write_output_json,
+)
+
+EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"]
+POOL_DICT = dict[str | tuple, "STRAT_DICT"]
+STRAT_DICT = dict[str | tuple, "Resource"]
+
+
+class DataPaths:
+ """Store subject-session specific data paths."""
+
+ def __init__(
+ self, *, data_paths: Optional[dict] = None, part_id: Optional[str] = ""
+ ) -> None:
+ """Initialize a `DataPaths` instance."""
+ if not data_paths:
+ data_paths = {}
+ if part_id and "part_id" in data_paths and part_id != data_paths["part_id"]:
+ WFLOGGER.warning(
+ "both 'part_id' (%s) and data_paths['part_id'] (%s) provided. "
+ "Using '%s'.",
+ part_id,
+ data_paths["part_id"],
+ part_id,
+ )
+ anat: dict[str, str] | str = data_paths.get("anat", {})
+ if isinstance(anat, str):
+ anat = {"T1": anat}
+ self.anat: dict[str, str] = anat
+ self.creds_path: Optional[str] = data_paths.get("creds_path")
+ self.fmap: Optional[dict] = data_paths.get("fmap")
+ self.func: dict[str, dict[str, str | dict]] = data_paths.get("func", {})
+ self.part_id: str = data_paths.get("subject_id", "")
+ self.site_id: str = data_paths.get("site_id", "")
+ self.ses_id: str = data_paths.get("unique_id", "")
+ self.unique_id: str = "_".join([self.part_id, self.ses_id])
+ self.derivatives_dir: Optional[str] = data_paths.get("derivatives_dir")
+
+ def __repr__(self) -> str:
+ """Return reproducible string representation of `DataPaths` instance."""
+ return f"DataPaths(data_paths={self.as_dict()})"
+
+ def __str__(self) -> str:
+ """Return string representation of a `DataPaths` instance."""
+ return f""
+
+ def as_dict(self) -> dict:
+ """Return a `data_paths` dictionary.
+
+ `data_paths` format::
+
+ {"anat": {"T1w": "{T1w path}", "T2w": "{T2w path}"},
+ "creds_path": {None OR path to credentials CSV},
+ "func": {
+ "{scan ID}": {
+ "scan": "{path to BOLD}",
+ "scan_parameters": {scan parameter dictionary},
+ }
+ },
+ "site_id": "site-ID",
+ "subject_id": "sub-01",
+ "unique_id": "ses-1",
+ "derivatives_dir": "{derivatives_dir path}",}
+ """
+ return {
+ k: v
+ for k, v in {
+ key: getattr(self, key)
+ for key in [
+ "anat",
+ "creds_path",
+ "func",
+ "site_id",
+ "subject_id",
+ "unique_id",
+ "derivatives_dir",
+ ]
+ }.items()
+ if v
+ }
+
+
+@Function.sig_imports(["from typing import Optional"])
+def set_iterables(
+ scan: str,
+ mask_paths: Optional[list[str]] = None,
+ ts_paths: Optional[list[str]] = None,
+) -> tuple[str, str, str]:
+ """Match scan with filepath to get filepath."""
+ if mask_paths is None:
+ mask_paths = []
+ if ts_paths is None:
+ ts_paths = []
+ mask_path = [path for path in mask_paths if scan in path]
+ ts_path = [path for path in ts_paths if scan in path]
+
+ return (scan, mask_path[0], ts_path[0])
+
+
+def strip_template(data_label: str) -> tuple[str, dict[str, str]]:
+ """Strip a template name from a data label to use as a :py:class:`Resource` key."""
+ json = {}
+ # rename to template
+ for prefix in ["space-", "from-", "to-"]:
+ for bidstag in data_label.split("_"):
+ if bidstag.startswith(prefix):
+ _template_key, template_val = bidstag.split("-")
+ template_name, _template_desc = lookup_identifier(template_val)
+ if template_name:
+ json["Template"] = template_val
+ data_label = data_label.replace(template_val, "template")
+ elif bidstag.startswith("res-"):
+ _res_key, res_val = bidstag.split("-")
+ json["Resolution"] = res_val
+ data_label = data_label.replace(bidstag, "")
+ if data_label.find("__"):
+ data_label = data_label.replace("__", "_")
+ return data_label, json
+
+
+class ResourceData(NamedTuple):
+ """Attribute and tuple access for `ResourceData`."""
+
+ node: pe.Node
+ """Resource :py:class:`~nipype.pipeline.engine.Node`."""
+ out: str
+ """Output key."""
+
+
+class Resource:
+ """A single `Resource` and its methods."""
+
+ def __init__(self, data: tuple[pe.Node, str], json: dict) -> None:
+ """Initialize a `Resource`."""
+ self.data = ResourceData(*data)
+ """Tuple of source :py:class:`~nipype.pipeline.engine.Node` and output key."""
+ self._json: dict = json
+ """Metadata."""
+ self._keys = {"data", "json"}
+ """Dictionary-style subscriptable keys."""
+
+ def keys(self) -> list[str]:
+ """Return list of subscriptable keys."""
+ return list(self._keys)
+
+ def __contains__(self, item: Any) -> bool:
+ """Return ``True`` if `item` in :py:meth:`Resource.keys()`, ``False`` otherwise."""
+ return item in self.keys()
+
+ def __getitem__(self, name: str) -> Any:
+ """Provide legacy dict-style get access."""
+ if name in self.keys():
+ return getattr(self, name)
+ msg = f"Key '{name}' not set in {self}."
+ raise KeyError(msg)
+
+ def __repr__(self) -> str:
+ """Return reproducible string for `Resource`."""
+ positional = f"Resource(data={self.data}, json={self.json}"
+ kw = ", ".join(
+ f"{key}={getattr(self, key)}"
+ for key in self.keys()
+ if key not in ["data", "json"]
+ )
+ return f"{positional}{kw})"
+
+ def __setitem__(self, name: str, value: Any) -> None:
+ """Provide legacy dict-style set access for `Resource`."""
+ setattr(self, name, value)
+ if name not in self.keys():
+ self._keys.add(name)
+
+ def __str__(self) -> str:
+ """Return string representation of `Resource`."""
+ return f"{self.data[0]}"
+
+ def get_json(self) -> dict[str | tuple, Any]:
+ """Return a deep copy of `Resource` JSON."""
+ UTLOGGER.debug(
+ "%s is a deep copy of the attached JSON. Assign it to a variable before modifying or the changes will be ephemeral.",
+ self.__class__.__name__,
+ )
+ return json.loads(json.dumps(self._json))
+
+ def set_json(self, value=dict) -> None:
+ """Update `Resource` JSON."""
+ self._json.update(value)
+
+ json = property(get_json, set_json, doc=get_json.__doc__)
+
+ @property
+ def cpac_provenance(self) -> list:
+ """Get "CpacProvenance" of a `Resource`."""
+ return self.json["CpacProvenance"]
+
+
+class _Pool:
+ """All Resources."""
+
+ def __init__(self) -> None:
+ """Initialize a :py:class:`ResourcePool` or :py:class:`StratPool` ."""
+ self.ants_interp: str
+ self.cfg: Configuration
+ self.creds_paths: Optional[str]
+ self.data_paths: DataPaths
+ self.fsl_interp: str
+ self.func_reg: bool
+ self.fwhm: list[int]
+ self.info: dict = {}
+ self.logdir: Optional[str]
+ self.name: list[str] | str
+ self.num_ants_cores: int
+ self.num_cpus = int
+ self.part_id: str
+ self.pipe_list: list
+ self.ses_id: str
+ self.smoothing_bool: bool
+ self.smooth_opts: list[str]
+ self.regressors: dict | list
+ self.rpool: dict
+ self.run_smoothing: bool
+ self.run_zscoring: bool
+ self.unique_id: str
+ self.zscoring_bool: bool
+ self.wf: pe.Workflow
+
+ def __repr__(self) -> str:
+ """Return reproducible `_Pool` string."""
+ params = [
+ f"{param}={getattr(self, param)}"
+ for param in ["rpool", "name", "cfg", "pipe_list"]
+ if getattr(self, param, None)
+ ]
+ return f'{self.__class__.__name__}({", ".join(params)})'
+
+ def __str__(self) -> str:
+ """Return string representation of a `_Pool`."""
+ if self.name:
+ return f"{self.__class__.__name__}({self.name}): {list(self.rpool)}"
+ return f"{self.__class__.__name__}: {list(self.rpool)}"
+
+ @staticmethod
+ def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]:
+ """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one).
+
+ NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation).
+ """
+ if not isinstance(prov, list):
+ msg = (
+ "\n[!] Developer info: the CpacProvenance "
+ f"entry for {prov} has to be a list.\n"
+ )
+ raise TypeError(msg)
+ last_entry = get_last_prov_entry(prov)
+ resource = last_entry.split(":")[0]
+ return (resource, str(prov))
+
+ def check_rpool(self, resource: list[str] | str) -> bool:
+ """Check if a `resource` is present in the `_Pool`."""
+ if not isinstance(resource, list):
+ resource = [resource]
+ for name in resource:
+ if name in self.rpool:
+ return True
+ return False
+
+ def keys(self) -> KeysView:
+ """Return `rpool`'s keys."""
+ return self.rpool.keys()
+
+ def __contains__(self, key) -> bool:
+ """Return ``True`` if key in `_Pool`, ``False`` otherwise."""
+ return key in self.keys()
+
+ @staticmethod
+ def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]:
+ """Return the last item in the provenance list.
+
+ Each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD
+ data) has its own provenance list. the name of the resource, and
+ the node that produced it, is always the last item in the provenance
+ list, with the two separated by a colon (`:`)
+ """
+ if not len(prov):
+ return None
+ if isinstance(prov[-1], list):
+ last_item_in_list = prov[-1][-1]
+ assert isinstance(last_item_in_list, str)
+ return last_item_in_list.split(":")[0]
+ if isinstance(prov[-1], str):
+ return prov[-1].split(":")[0]
+ return None
+
+ def set_data(
+ self,
+ resource: str,
+ node: pe.Node | pe.Workflow,
+ output: str,
+ json_info: dict[str | tuple, Any],
+ pipe_idx: PIPE_IDX,
+ node_name: str,
+ fork: bool = False,
+ inject: bool = False,
+ ) -> None:
+ """Plug a :py:class:`Resource` into a `_Pool`."""
+ json_info = json_info.copy()
+ cpac_prov: LIST_OF_LIST_OF_STR = []
+ if "CpacProvenance" in json_info:
+ cpac_prov = json_info["CpacProvenance"]
+ current_prov_list = list(cpac_prov)
+ new_prov_list = list(cpac_prov) # <---- making a copy, it was already a list
+ if not inject:
+ new_prov_list.append(f"{resource}:{node_name}")
+ try:
+ _resource, new_pipe_idx = self.generate_prov_string(new_prov_list)
+ except IndexError:
+ msg = (
+ f"\n\nThe set_data() call for {resource} has no "
+ "provenance information and should not be an "
+ "injection."
+ )
+ raise IndexError(msg)
+ if not json_info:
+ json_info = {
+ "RawSources": [
+ resource # <---- this will be repopulated to the full file path at the end of the pipeline building, in gather_pipes()
+ ]
+ }
+ json_info["CpacProvenance"] = new_prov_list
+
+ if resource not in self.keys():
+ self.rpool[resource] = {}
+ elif not fork: # <--- in the event of multiple strategies/options, this will run for every option; just keep in mind
+ search = False
+ if self.get_resource_from_prov(current_prov_list) == resource:
+ # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION
+ pipe_idx = self.generate_prov_string(current_prov_list)[1]
+ if pipe_idx not in self.rpool[resource].keys():
+ search = True
+ else:
+ search = True
+ if search:
+ for idx in current_prov_list:
+ if self.get_resource_from_prov(idx) == resource:
+ if isinstance(idx, list):
+ # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION
+ pipe_idx = self.generate_prov_string(idx)[1]
+ elif isinstance(idx, str):
+ pipe_idx = idx
+ break
+ if pipe_idx in self.rpool[resource].keys():
+ # in case the resource name is now new, and not the original
+ # remove old keys so we don't end up with a new strat for every new node unit (unless we fork)
+ del self.rpool[resource][pipe_idx]
+ if new_pipe_idx not in self.rpool[resource]:
+ self.rpool[resource][new_pipe_idx] = Resource(
+ data=ResourceData(node, output), json=json_info
+ )
+ if new_pipe_idx not in self.pipe_list:
+ self.pipe_list.append(new_pipe_idx)
+
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR | str | list[str],
+ pipe_idx: Optional[PIPE_IDX],
+ report_fetched: bool,
+ optional: bool,
+ ) -> (
+ Optional[Resource | STRAT_DICT | dict]
+ | tuple[Optional[Resource | STRAT_DICT], Optional[str]]
+ ):
+ """Return a dictionary of strats or a single :py:class:`Resource` ."""
+ if not isinstance(resource, list):
+ resource = [resource]
+ # if a list of potential inputs are given, pick the first one found
+ for label in resource:
+ if label in self.keys():
+ _found = self.rpool[label]
+ if pipe_idx:
+ _found = _found[pipe_idx]
+ if report_fetched:
+ return _found, label
+ return _found
+ if optional:
+ if report_fetched:
+ return (None, None)
+ return None
+ msg = (
+ "\n\n[!] C-PAC says: None of the listed resources are in "
+ f"the resource pool:\n\n {resource}\n\nOptions:\n- You "
+ "can enable a node block earlier in the pipeline which "
+ "produces these resources. Check the 'outputs:' field in "
+ "a node block's documentation.\n- You can directly "
+ "provide this required data by pulling it from another "
+ "BIDS directory using 'source_outputs_dir:' in the "
+ "pipeline configuration, or by placing it directly in "
+ "your C-PAC output directory.\n- If you have done these, "
+ "and you still get this message, please let us know "
+ "through any of our support channels at: "
+ "https://fcp-indi.github.io/\n"
+ )
+ raise LookupError(msg)
+
+
+class ResourcePool(_Pool):
+ """A pool of :py:class:`Resource` s."""
+
+ def __init__(
+ self,
+ name: str = "",
+ cfg: Optional[Configuration] = None,
+ pipe_list: Optional[list] = None,
+ *,
+ data_paths: Optional[DataPaths | dict] = None,
+ part_id: Optional[str] = None,
+ pipeline_name: str = "",
+ wf: Optional[pe.Workflow] = None,
+ ) -> None:
+ """Initialize a `ResourcePool`."""
+ self.name = name
+ super().__init__()
+ if isinstance(data_paths, dict):
+ data_paths = DataPaths(data_paths=data_paths)
+ elif not data_paths:
+ data_paths = DataPaths(part_id=part_id)
+ self.data_paths = data_paths
+ # pass-through for convenient access
+ self.creds_path = self.data_paths.creds_path
+ self.part_id = self.data_paths.part_id
+ self.ses_id = self.data_paths.ses_id
+ self.unique_id = self.data_paths.unique_id
+ self.rpool: POOL_DICT = {}
+
+ if not pipe_list:
+ self.pipe_list = []
+ else:
+ self.pipe_list = pipe_list
+
+ if cfg:
+ self.cfg = cfg
+ else:
+ self.cfg = Preconfiguration("blank")
+
+ self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"])
+ self.num_cpus = self._config_lookup(
+ ["pipeline_setup", "system_config", "max_cores_per_participant"]
+ )
+ self.num_ants_cores = self._config_lookup(
+ ["pipeline_setup", "system_config", "num_ants_threads"]
+ )
+
+ self.ants_interp = self._config_lookup(
+ [
+ "registration_workflows",
+ "functional_registration",
+ "func_registration_to_template",
+ "ANTs_pipelines",
+ "interpolation",
+ ]
+ )
+ self.fsl_interp = self._config_lookup(
+ [
+ "registration_workflows",
+ "functional_registration",
+ "func_registration_to_template",
+ "FNIRT_pipelines",
+ "interpolation",
+ ]
+ )
+ self.func_reg = self._config_lookup(
+ [
+ "registration_workflows",
+ "functional_registration",
+ "func_registration_to_template",
+ "run",
+ ]
+ )
+
+ self.run_smoothing = "smoothed" in self._config_lookup(
+ ["post_processing", "spatial_smoothing", "output"], list
+ )
+ self.smoothing_bool = self._config_lookup(
+ ["post_processing", "spatial_smoothing", "run"]
+ )
+ self.run_zscoring = "z-scored" in self._config_lookup(
+ ["post_processing", "z-scoring", "output"], list
+ )
+ self.zscoring_bool = self._config_lookup(
+ ["post_processing", "z-scoring", "run"]
+ )
+ self.fwhm = self._config_lookup(
+ ["post_processing", "spatial_smoothing", "fwhm"]
+ )
+ self.smooth_opts = self._config_lookup(
+ ["post_processing", "spatial_smoothing", "smoothing_method"]
+ )
+
+ if wf:
+ self.wf = wf
+ else:
+ self.initialize_nipype_wf(pipeline_name)
+
+ self.xfm = [
+ "alff",
+ "desc-sm_alff",
+ "desc-zstd_alff",
+ "desc-sm-zstd_alff",
+ "falff",
+ "desc-sm_falff",
+ "desc-zstd_falff",
+ "desc-sm-zstd_falff",
+ "reho",
+ "desc-sm_reho",
+ "desc-zstd_reho",
+ "desc-sm-zstd_reho",
+ ]
+ ingress_derivatives = False
+ try:
+ if self.data_paths.derivatives_dir and self._config_lookup(
+ ["pipeline_setup", "outdir_ingress", "run"], bool
+ ):
+ ingress_derivatives = True
+ except (AttributeError, KeyError, TypeError):
+ pass
+ if ingress_derivatives:
+ self.ingress_output_dir()
+ else:
+ self.ingress_raw_anat_data()
+ if data_paths.func:
+ self.ingress_raw_func_data()
+ self.ingress_pipeconfig_paths()
+
+ def back_propogate_template_name(
+ self, resource_idx: str, json_info: dict, id_string: pe.Node
+ ) -> None:
+ """Find and apply the template name from a :py:class:`Resource` 's provenance."""
+ if "template" in resource_idx and self.check_rpool("derivatives-dir"):
+ if self.check_rpool("template"):
+ node, out = self.get_data("template")
+ self.wf.connect(node, out, id_string, "template_desc")
+ elif "Template" in json_info:
+ id_string.inputs.template_desc = json_info["Template"]
+ elif (
+ "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1
+ ):
+ for resource in source_set(json_info["CpacProvenance"]):
+ source, value = resource.split(":", 1)
+ if value.startswith("template_") and source != "FSL-AFNI-bold-ref":
+ # 'FSL-AFNI-bold-ref' is currently allowed to be in
+ # a different space, so don't use it as the space for
+ # descendents
+ try:
+ ancestors = self.rpool.get(source)
+ assert ancestors is not None
+ anscestor_json = next(iter(ancestors.items()))[1].json
+ if "Description" in anscestor_json:
+ id_string.inputs.template_desc = anscestor_json[
+ "Description"
+ ]
+ return
+ except (IndexError, KeyError):
+ pass
+ return
+
+ def gather_pipes( # noqa: PLR0915
+ self,
+ wf: pe.Workflow,
+ cfg: Configuration,
+ all_types: bool = False,
+ add_excl: Optional[list[str]] = None,
+ ) -> None:
+ """Gather pipes including naming, postproc, and expected outputs."""
+ excl: list[str] = []
+ # substring_excl: list[str] = []
+ outputs_logger = getLogger(f"{self.part_id}_expectedOutputs")
+ expected_outputs = ExpectedOutputs()
+
+ if add_excl:
+ excl += add_excl
+
+ if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]: # type: ignore [attr-defined]
+ excl += Outputs.native_nonsmooth
+ excl += Outputs.template_nonsmooth
+
+ if "raw" not in cfg.post_processing["z-scoring"]["output"]: # type: ignore [attr-defined]
+ excl += Outputs.native_raw
+ excl += Outputs.template_raw
+
+ if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]: # type: ignore [attr-defined]
+ # substring_excl.append(['bold'])
+ excl += Outputs.debugging
+
+ for resource in self.keys():
+ if resource in excl or resource not in Outputs.any:
+ continue
+
+ # drop = False
+ # for substring_list in substring_excl:
+ # bool_list = []
+ # for substring in substring_list:
+ # if substring in resource:
+ # bool_list.append(True)
+ # else:
+ # bool_list.append(False)
+ # for item in bool_list:
+ # if not item:
+ # break
+ # else:
+ # drop = True
+ # if drop:
+ # break
+ # if drop:
+ # continue
+
+ subdir = "other"
+ if resource in Outputs.anat:
+ subdir = "anat"
+ # TODO: get acq- etc.
+ elif resource in Outputs.func:
+ subdir = "func"
+ # TODO: other stuff like acq- etc.
+
+ for pipe_idx in self.rpool[resource]:
+ unique_id = self.unique_id
+ part_id = self.part_id
+ ses_id = self.ses_id
+
+ if "ses-" not in ses_id:
+ ses_id = f"ses-{ses_id}"
+
+ out_dir = cfg.pipeline_setup["output_directory"]["path"] # type: ignore [attr-defined]
+ pipe_name = cfg.pipeline_setup["pipeline_name"] # type: ignore [attr-defined]
+ container = os.path.join(f"pipeline_{pipe_name}", part_id, ses_id)
+ filename = f"{unique_id}_{res_in_filename(self.cfg, resource)}"
+
+ out_path = os.path.join(out_dir, container, subdir, filename)
+
+ out_dct = {
+ "unique_id": unique_id,
+ "out_dir": out_dir,
+ "container": container,
+ "subdir": subdir,
+ "filename": filename,
+ "out_path": out_path,
+ }
+ self.rpool[resource][pipe_idx]["out"] = out_dct
+
+ # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here.
+ # TODO: can do the pipeline_description.json variants here too!
+
+ num_variant: Optional[int | str] = 0
+ if len(self.rpool[resource]) == 1:
+ num_variant = ""
+ unlabelled = self._get_unlabelled(resource)
+ for pipe_idx in self.rpool[resource]:
+ pipe_x = self._get_pipe_number(pipe_idx)
+ json_info = self.rpool[resource][pipe_idx]["json"]
+ out_dct = self.rpool[resource][pipe_idx]["out"]
+
+ try:
+ if unlabelled:
+ assert isinstance(num_variant, int)
+ num_variant += 1
+ except TypeError:
+ pass
+
+ try:
+ del json_info["subjson"]
+ except KeyError:
+ pass
+
+ if out_dct["subdir"] == "other" and not all_types:
+ continue
+
+ unique_id = out_dct["unique_id"]
+ resource_idx = resource
+
+ if isinstance(num_variant, int):
+ resource_idx, out_dct = name_fork(
+ resource_idx, cfg, json_info, out_dct
+ )
+ if unlabelled:
+ if "desc-" in out_dct["filename"]:
+ for key in out_dct["filename"].split("_")[::-1]:
+ # final `desc` entity
+ if key.startswith("desc-"):
+ out_dct["filename"] = out_dct["filename"].replace(
+ key, f"{key}-{num_variant}"
+ )
+ resource_idx = resource_idx.replace(
+ key, f"{key}-{num_variant}"
+ )
+ break
+ else:
+ suff = resource.split("_")[-1]
+ newdesc_suff = f"desc-{num_variant}_{suff}"
+ resource_idx = resource_idx.replace(suff, newdesc_suff)
+ id_string = pe.Node(
+ Function(
+ input_names=[
+ "cfg",
+ "unique_id",
+ "resource",
+ "scan_id",
+ "template_desc",
+ "atlas_id",
+ "fwhm",
+ "subdir",
+ "extension",
+ ],
+ output_names=["out_filename"],
+ function=create_id_string,
+ ),
+ name=f"id_string_{resource_idx}_{pipe_x}",
+ )
+ id_string.inputs.cfg = self.cfg
+ id_string.inputs.unique_id = unique_id
+ id_string.inputs.resource = resource_idx
+ id_string.inputs.subdir = out_dct["subdir"]
+
+ # grab the iterable scan ID
+ if out_dct["subdir"] == "func":
+ node, out = self.rpool["scan"]["['scan:func_ingress']"]["data"]
+ wf.connect(node, out, id_string, "scan_id")
+
+ self.back_propogate_template_name(resource_idx, json_info, id_string)
+ # grab the FWHM if smoothed
+ for tag in resource.split("_"):
+ if "desc-" in tag and "-sm" in tag:
+ fwhm_idx = str(pipe_idx).replace(f"{resource}:", "fwhm:")
+ try:
+ node, out = self.rpool["fwhm"][fwhm_idx]["data"]
+ wf.connect(node, out, id_string, "fwhm")
+ except KeyError:
+ # smoothing was not done for this resource in the
+ # engine.py smoothing
+ pass
+ break
+ atlas_suffixes = ["timeseries", "correlations", "statmap"]
+ # grab the iterable atlas ID
+ atlas_id = None
+ if not resource.endswith("desc-confounds_timeseries"):
+ if resource.split("_")[-1] in atlas_suffixes:
+ atlas_idx = str(pipe_idx).replace(resource, "atlas_name")
+ # need the single quote and the colon inside the double
+ # quotes - it's the encoded pipe_idx
+ # atlas_idx = new_idx.replace(f"'{temp_rsc}:",
+ # "'atlas_name:")
+ if atlas_idx in self.rpool["atlas_name"]:
+ node, out = self.rpool["atlas_name"][atlas_idx]["data"]
+ wf.connect(node, out, id_string, "atlas_id")
+ elif "atlas-" in resource:
+ for tag in resource.split("_"):
+ if "atlas-" in tag:
+ atlas_id = tag.replace("atlas-", "")
+ id_string.inputs.atlas_id = atlas_id
+ else:
+ WFLOGGER.warning(
+ "\n[!] No atlas ID found for %s.\n", out_dct["filename"]
+ )
+ nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}")
+ nii_name.inputs.keep_ext = True
+
+ if resource in Outputs.ciftis:
+ nii_name.inputs.keep_ext = False
+ id_string.inputs.extension = Outputs.ciftis[resource]
+ else:
+ nii_name.inputs.keep_ext = True
+
+ if resource in Outputs.giftis:
+ nii_name.inputs.keep_ext = False
+ id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii"
+
+ else:
+ nii_name.inputs.keep_ext = True
+
+ wf.connect(id_string, "out_filename", nii_name, "format_string")
+
+ node, out = self.rpool[resource][pipe_idx]["data"]
+ try:
+ wf.connect(node, out, nii_name, "in_file")
+ except OSError as os_error:
+ WFLOGGER.warning(os_error)
+ continue
+
+ write_json = pe.Node(
+ Function(
+ input_names=["json_data", "filename"],
+ output_names=["json_file"],
+ function=write_output_json,
+ ),
+ name=f"json_{resource_idx}_{pipe_x}",
+ )
+ write_json.inputs.json_data = json_info
+
+ wf.connect(id_string, "out_filename", write_json, "filename")
+ ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}")
+ ds.inputs.parameterization = False
+ ds.inputs.base_directory = out_dct["out_dir"]
+ ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined]
+ "s3_encryption"
+ ]
+ ds.inputs.container = out_dct["container"]
+
+ if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: # type: ignore[attr-defined]
+ ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined]
+ "aws_output_bucket_credentials"
+ ]
+ expected_outputs += (
+ out_dct["subdir"],
+ create_id_string(
+ self.cfg,
+ unique_id,
+ resource_idx,
+ template_desc=id_string.inputs.template_desc,
+ atlas_id=atlas_id,
+ subdir=out_dct["subdir"],
+ ),
+ )
+ wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data')
+ wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json')
+ outputs_logger.info(expected_outputs)
+
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: None = None,
+ report_fetched: Literal[False] = False,
+ *,
+ optional: Literal[True],
+ ) -> Optional[STRAT_DICT]: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: PIPE_IDX,
+ report_fetched: Literal[False] = False,
+ *,
+ optional: Literal[True],
+ ) -> Optional[Resource]: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: None = None,
+ *,
+ report_fetched: Literal[True],
+ optional: Literal[True],
+ ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: PIPE_IDX,
+ report_fetched: Literal[True],
+ optional: Literal[True],
+ ) -> tuple[Optional[Resource], Optional[str]]: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: None = None,
+ report_fetched: Literal[False] = False,
+ optional: Literal[False] = False,
+ ) -> STRAT_DICT: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: PIPE_IDX,
+ report_fetched: Literal[False] = False,
+ optional: Literal[False] = False,
+ ) -> Resource: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: None = None,
+ *,
+ report_fetched: Literal[True],
+ optional: bool = False,
+ ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: PIPE_IDX,
+ report_fetched: Literal[True],
+ optional: Literal[False] = False,
+ ) -> tuple[Resource, str]: ...
+ @overload
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ report_fetched: bool = False,
+ optional: bool = False,
+ ) -> (
+ Optional[Resource | STRAT_DICT]
+ | tuple[Optional[Resource | STRAT_DICT], Optional[str]]
+ ): ...
+ def get(
+ self,
+ resource: LIST_OF_LIST_OF_STR,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ report_fetched: bool = False,
+ optional: bool = False,
+ ):
+ """Return a dictionary of strats.
+
+ Inside those are dictionaries like ``{'data': (node, out), 'json': info}``.
+ """
+ return super().get(resource, pipe_idx, report_fetched, optional)
+
+ @overload
+ def get_data(
+ self,
+ resource: list[str] | str,
+ pipe_idx: None = None,
+ report_fetched: bool = False,
+ quick_single: bool = False,
+ ) -> ResourceData: ...
+ @overload
+ def get_data(
+ self,
+ resource: list[str] | str,
+ pipe_idx: PIPE_IDX,
+ report_fetched: Literal[True],
+ quick_single: Literal[False] = False,
+ ) -> tuple[ResourceData, str]: ...
+ @overload
+ def get_data(
+ self,
+ resource: list[str] | str,
+ pipe_idx: PIPE_IDX,
+ report_fetched: Literal[False] = False,
+ quick_single: bool = False,
+ ) -> ResourceData: ...
+ @overload
+ def get_data(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX],
+ report_fetched: bool,
+ quick_single: Literal[True],
+ ) -> ResourceData: ...
+ def get_data(
+ self,
+ resource,
+ pipe_idx=None,
+ report_fetched=False,
+ quick_single=False,
+ ):
+ """Get :py:class:`ResourceData` from `ResourcePool`."""
+ _resource = self.get(resource, pipe_idx=pipe_idx, report_fetched=report_fetched)
+ if report_fetched:
+ if pipe_idx:
+ connect, fetched = _resource
+ assert isinstance(connect, Resource) and isinstance(fetched, str)
+ return connect.data, fetched
+ if quick_single or len(resource) == 1:
+ assert isinstance(_resource, dict)
+ for value in _resource.values():
+ return value.data
+ assert isinstance(_resource, Resource)
+ return _resource.data
+
+ def get_json(self, resource: str, strat: str | tuple) -> dict:
+ """Get JSON metadata from a :py:class:`Resource` in a strategy."""
+ return self.get(resource, pipe_idx=strat).json
+
+ def get_json_info(self, resource: str, key: str) -> Any:
+ """Get a metadata value from a matching from any strategy."""
+ # TODO: key checks
+ for val in self.rpool[resource].values():
+ if key in val.json:
+ return val.json[key]
+ msg = f"{key} not found in any strategy for {resource} in {self}."
+ raise KeyError(msg)
+
+ @staticmethod
+ def get_raw_label(resource: str) -> str:
+ """Remove ``desc-*`` label."""
+ for tag in resource.split("_"):
+ if "desc-" in tag:
+ resource = resource.replace(f"{tag}_", "")
+ break
+ return resource
+
+ def get_strats( # noqa: PLR0912,PLR0915
+ self, resources: NODEBLOCK_INPUTS, debug: bool = False
+ ) -> dict[str | tuple, "StratPool"]:
+ """Get a dictionary of :py:class:`StratPool` s."""
+ # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS
+ # TODO: (and it doesn't have to be)
+ import itertools
+
+ linked_resources = []
+ resource_list: list[str | list[str]] = []
+ if debug:
+ verbose_logger = getLogger("CPAC.engine")
+ verbose_logger.debug("\nresources: %s", resources)
+ for resource in resources:
+ # grab the linked-input tuples
+ if isinstance(resource, tuple):
+ linked: list[str] = []
+ for label in list(resource):
+ rp_dct, fetched_resource = self.get(
+ label, report_fetched=True, optional=True
+ )
+ if not rp_dct:
+ continue
+ assert fetched_resource is not None
+ linked.append(fetched_resource)
+ resource_list += linked
+ if len(linked) < 2: # noqa: PLR2004
+ continue
+ linked_resources.append(linked)
+ else:
+ resource_list.append(resource)
+
+ total_pool = []
+ variant_pool: dict = {}
+ len_inputs = len(resource_list)
+ if debug:
+ verbose_logger = getLogger("CPAC.engine")
+ verbose_logger.debug("linked_resources: %s", linked_resources)
+ verbose_logger.debug("resource_list: %s", resource_list)
+ for resource in resource_list:
+ (
+ rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath
+ fetched_resource,
+ ) = self.get(
+ resource,
+ report_fetched=True,
+ optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be
+ )
+ if not rp_dct:
+ len_inputs -= 1
+ continue
+ assert isinstance(rp_dct, dict) and fetched_resource is not None
+ sub_pool = []
+ if debug:
+ verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct))
+ for strat in rp_dct.keys():
+ json_info = self.get_json(fetched_resource, strat)
+ cpac_prov = json_info["CpacProvenance"]
+ sub_pool.append(cpac_prov)
+ if fetched_resource not in variant_pool:
+ variant_pool[fetched_resource] = []
+ if "CpacVariant" in json_info:
+ for key, val in json_info["CpacVariant"].items():
+ if val not in variant_pool[fetched_resource]:
+ variant_pool[fetched_resource] += val
+ variant_pool[fetched_resource].append(f"NO-{val[0]}")
+
+ if debug:
+ verbose_logger = getLogger("CPAC.engine")
+ verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool)
+ total_pool.append(sub_pool)
+
+ if not total_pool:
+ raise LookupError(
+ "\n\n[!] C-PAC says: None of the listed "
+ "resources in the node block being connected "
+ "exist in the resource pool.\n\nResources:\n"
+ "%s\n\n" % resource_list
+ )
+
+ # TODO: right now total_pool is:
+ # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]],
+ # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]]
+
+ # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.]
+ # TODO: and the actual resource is encoded in the tag: of the last item, every time!
+ # keying the strategies to the resources, inverting it
+ if len_inputs > 1:
+ strats = itertools.product(*total_pool)
+
+ # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations.
+ # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}.
+ # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items.
+ new_strats: dict[str | tuple, StratPool] = {}
+
+ # get rid of duplicates - TODO: refactor .product
+ strat_str_list = []
+ strat_list_list = []
+ for strat_tuple in strats:
+ strat_list = list(deepcopy(strat_tuple))
+ strat_str = str(strat_list)
+ if strat_str not in strat_str_list:
+ strat_str_list.append(strat_str)
+ strat_list_list.append(strat_list)
+
+ if debug:
+ verbose_logger = getLogger("CPAC.engine")
+ verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list))
+ for strat_list in strat_list_list:
+ json_dct = {}
+ for strat in strat_list:
+ # strat is a prov list for a single resource/input
+ prov_resource, strat_idx = self.generate_prov_string(strat)
+ strat_json = self.get_json(prov_resource, strat=strat_idx)
+ json_dct[prov_resource] = strat_json
+
+ drop = False
+ if linked_resources:
+ for linked in linked_resources: # <--- 'linked' is each tuple
+ if drop:
+ break
+ for xlabel in linked:
+ if drop or xlabel is None:
+ break
+ xjson = json.loads(json.dumps(json_dct[xlabel]))
+ for ylabel in linked:
+ if xlabel == ylabel or ylabel is None:
+ continue
+ yjson = json.loads(json.dumps(json_dct[ylabel]))
+
+ if "CpacVariant" not in xjson:
+ xjson["CpacVariant"] = {}
+ if "CpacVariant" not in yjson:
+ yjson["CpacVariant"] = {}
+
+ current_strat = []
+ for val in xjson["CpacVariant"].values():
+ if isinstance(val, list):
+ current_strat.append(val[0])
+ else:
+ current_strat.append(val)
+ current_spread = list(set(variant_pool[xlabel]))
+ for spread_label in current_spread:
+ if "NO-" in spread_label:
+ continue
+ if spread_label not in current_strat:
+ current_strat.append(f"NO-{spread_label}")
+
+ other_strat = []
+ for val in yjson["CpacVariant"].values():
+ if isinstance(val, list):
+ other_strat.append(val[0])
+ else:
+ other_strat.append(val)
+ other_spread = list(set(variant_pool[ylabel]))
+ for spread_label in other_spread:
+ if "NO-" in spread_label:
+ continue
+ if spread_label not in other_strat:
+ other_strat.append(f"NO-{spread_label}")
+
+ for variant in current_spread:
+ in_current_strat = False
+ in_other_strat = False
+ in_other_spread = False
+
+ if variant is None:
+ in_current_strat = True
+ if None in other_spread:
+ in_other_strat = True
+ if variant in current_strat:
+ in_current_strat = True
+ if variant in other_strat:
+ in_other_strat = True
+ if variant in other_spread:
+ in_other_spread = True
+
+ if not in_other_strat:
+ if in_other_spread:
+ if in_current_strat:
+ drop = True
+ break
+
+ if in_other_strat:
+ if in_other_spread:
+ if not in_current_strat:
+ drop = True
+ break
+ if drop:
+ break
+ if drop:
+ continue
+
+ # make the merged strat label from the multiple inputs
+ # strat_list is actually the merged CpacProvenance lists
+ pipe_idx = str(strat_list)
+ new_strats[pipe_idx] = StratPool(name=pipe_idx, cfg=self.cfg)
+ # new_strats is A DICTIONARY OF StratPool OBJECTS!
+ new_strats[pipe_idx].json = {"CpacProvenance": strat_list}
+
+ # now just invert resource:strat to strat:resource for each resource:strat
+ for cpac_prov in strat_list:
+ resource, strat = self.generate_prov_string(cpac_prov)
+ strat_resource = self.rpool[resource][strat]
+ # remember, `strat_resource` is a Resource.
+ new_strats[pipe_idx].rpool[resource] = strat_resource
+ # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together.
+ self.pipe_list.append(pipe_idx)
+ if "CpacVariant" in strat_resource["json"]:
+ if "CpacVariant" not in new_strats[pipe_idx]._json:
+ new_strats[pipe_idx]._json["CpacVariant"] = {}
+ for younger_resource, variant_list in (
+ new_strats[pipe_idx]._json["CpacVariant"].items()
+ ):
+ if (
+ younger_resource
+ not in new_strats[pipe_idx]._json["CpacVariant"]
+ ):
+ new_strats[pipe_idx]._json["CpacVariant"][
+ younger_resource
+ ] = variant_list
+ # preserve each input's JSON info also
+ new_strats[pipe_idx].preserve_json_info(resource, strat_resource)
+ else:
+ new_strats = {}
+ for resource_strat_list in total_pool:
+ # total_pool will have only one list of strats, for the one input
+ for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs
+ resource, pipe_idx = self.generate_prov_string(cpac_prov)
+ strat_resource = self.rpool[resource][pipe_idx]
+ # remember, `strat_resource` is a Resource.
+ new_strats[pipe_idx] = StratPool(
+ rpool={resource: strat_resource}, name=pipe_idx, cfg=self.cfg
+ ) # <----- again, new_strats is A DICTIONARY OF StratPool OBJECTS!
+ new_strats[pipe_idx].json = strat_resource.json
+ new_strats[pipe_idx].json["subjson"] = {}
+ new_strats[pipe_idx].json["CpacProvenance"] = cpac_prov
+ # preserve each input's JSON info also
+ new_strats[pipe_idx].preserve_json_info(resource, strat_resource)
+ return new_strats
+
+ def initialize_nipype_wf(self, name: str = "") -> None:
+ """Initialize a new nipype :py:class:`~nipype.pipeline.engine.Workflow` ."""
+ if name:
+ name = f"_{name}"
+ workflow_name = f"cpac{name}_{self.unique_id}"
+ self.wf = pe.Workflow(name=workflow_name)
+ self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] # type: ignore[attr-defined]
+ self.wf.config["execution"] = {
+ "hash_method": "timestamp",
+ "crashdump_dir": os.path.abspath(
+ self.cfg.pipeline_setup["log_directory"]["path"] # type: ignore[attr-defined]
+ ),
+ }
+
+ def ingress_freesurfer(self) -> None:
+ """Ingress FreeSurfer data."""
+ try:
+ fs_path = os.path.join(
+ self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined]
+ self.part_id,
+ )
+ except KeyError:
+ WFLOGGER.warning("No FreeSurfer data present.")
+ return
+
+ # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id)
+ if not os.path.exists(fs_path):
+ if "sub" in self.part_id:
+ fs_path = os.path.join(
+ self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined]
+ self.part_id.replace("sub-", ""),
+ )
+ else:
+ fs_path = os.path.join(
+ self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined]
+ ("sub-" + self.part_id),
+ )
+
+ # patch for flo-specific data
+ if not os.path.exists(fs_path):
+ subj_ses = f"{self.part_id}-{self.ses_id}"
+ fs_path = os.path.join(
+ self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined]
+ subj_ses,
+ )
+ if not os.path.exists(fs_path):
+ WFLOGGER.info(
+ "No FreeSurfer data found for subject %s", self.part_id
+ )
+ return
+
+ # Check for double nested subj names
+ if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))):
+ fs_path = os.path.join(fs_path, self.part_id)
+
+ fs_ingress = create_general_datasource("gather_freesurfer_dir")
+ fs_ingress.inputs.inputnode.set(
+ unique_id=self.unique_id,
+ data=fs_path,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ )
+ self.set_data(
+ "freesurfer-subject-dir",
+ fs_ingress,
+ "outputspec.data",
+ {},
+ "",
+ "freesurfer_config_ingress",
+ )
+
+ recon_outs = {
+ "pipeline-fs_raw-average": "mri/rawavg.mgz",
+ "pipeline-fs_subcortical-seg": "mri/aseg.mgz",
+ "pipeline-fs_brainmask": "mri/brainmask.mgz",
+ "pipeline-fs_wmparc": "mri/wmparc.mgz",
+ "pipeline-fs_T1": "mri/T1.mgz",
+ "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv",
+ "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv",
+ "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial",
+ "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial",
+ "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm",
+ "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm",
+ "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere",
+ "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere",
+ "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc",
+ "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc",
+ "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness",
+ "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness",
+ "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume",
+ "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume",
+ "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white",
+ "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white",
+ "pipeline-fs_xfm": "mri/transforms/talairach.lta",
+ }
+
+ for key, outfile in recon_outs.items():
+ fullpath = os.path.join(fs_path, outfile)
+ if os.path.exists(fullpath):
+ fs_ingress = create_general_datasource(f"gather_fs_{key}_dir")
+ fs_ingress.inputs.inputnode.set(
+ unique_id=self.unique_id,
+ data=fullpath,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ )
+ self.set_data(
+ key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress"
+ )
+ else:
+ WFLOGGER.warning("\n[!] Path does not exist for %s.\n", fullpath)
+
+ return
+
+ def ingress_output_dir(self) -> None:
+ """Ingress an output directory into a `ResourcePool`."""
+ dir_path = self.data_paths.derivatives_dir
+ assert dir_path is not None
+ WFLOGGER.info("\nPulling outputs from %s.\n", dir_path)
+
+ anat = os.path.join(dir_path, "anat")
+ func = os.path.join(dir_path, "func")
+
+ outdir_anat: list[str] = []
+ outdir_func: list[str] = []
+ func_paths: dict = {}
+ func_dict: dict = {}
+ func_key = ""
+
+ for subdir in [anat, func]:
+ if os.path.isdir(subdir):
+ for filename in os.listdir(subdir):
+ for ext in EXTS:
+ if ext in filename:
+ if subdir == anat:
+ outdir_anat.append(os.path.join(subdir, filename))
+ else:
+ outdir_func.append(os.path.join(subdir, filename))
+
+ # Add derivatives directory to rpool
+ ingress = create_general_datasource("gather_derivatives_dir")
+ ingress.inputs.inputnode.set(
+ unique_id=self.unique_id,
+ data=dir_path,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ )
+ self.set_data(
+ "derivatives-dir",
+ ingress,
+ "outputspec.data",
+ {},
+ "",
+ "outdir_config_ingress",
+ )
+
+ for subdirs in [outdir_anat, outdir_func]:
+ for filepath in subdirs:
+ filename = str(filepath)
+ for ext in EXTS:
+ filename = filename.split("/")[-1].replace(ext, "")
+
+ data_label = filename.split(self.unique_id)[1].lstrip("_")
+
+ if len(filename) == len(data_label):
+ msg = (
+ "\n\n[!] Possibly wrong participant or "
+ "session in this directory?\n\n"
+ f"Filepath: {filepath}\n\n"
+ )
+ raise Exception(msg)
+
+ bidstag = ""
+ for tag in data_label.split("_"):
+ for prefix in ["task-", "run-", "acq-", "rec"]:
+ if tag.startswith(prefix):
+ bidstag += f"{tag}_"
+ data_label = data_label.replace(f"{tag}_", "")
+ data_label, json = strip_template(data_label)
+
+ json_info, pipe_idx, node_name, data_label = self.json_outdir_ingress(
+ filepath, data_label, json
+ )
+
+ if (
+ "template" in data_label
+ and not json_info["Template"]
+ == self.cfg.pipeline_setup["outdir_ingress"]["Template"] # type: ignore[attr-defined]
+ ):
+ continue
+ # Rename confounds to avoid confusion in nuisance regression
+ if data_label.endswith("desc-confounds_timeseries"):
+ data_label = "pipeline-ingress_desc-confounds_timeseries"
+
+ if len(bidstag) > 1:
+ # Remove tail symbol
+ bidstag = bidstag[:-1]
+ if bidstag.startswith("task-"):
+ bidstag = bidstag.replace("task-", "")
+
+ # Rename bold mask for CPAC naming convention
+ # and to avoid collision with anat brain mask
+ if data_label.endswith("desc-brain_mask") and filepath in outdir_func:
+ data_label = data_label.replace("brain_mask", "bold_mask")
+
+ try:
+ pipe_x = self._get_pipe_number(pipe_idx)
+ except ValueError:
+ pipe_x = len(self.pipe_list)
+ if filepath in outdir_anat:
+ ingress = create_general_datasource(
+ f"gather_anat_outdir_{data_label!s}_{pipe_x}"
+ )
+ ingress.inputs.inputnode.set(
+ unique_id=self.unique_id,
+ data=filepath,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ )
+ self.set_data(
+ data_label,
+ ingress,
+ "outputspec.data",
+ json_info,
+ pipe_idx,
+ node_name=f"outdir_{data_label}_ingress",
+ inject=True,
+ )
+ else:
+ if data_label.endswith("desc-preproc_bold"):
+ func_key = data_label
+ func_dict[bidstag] = {}
+ func_dict[bidstag]["scan"] = str(filepath)
+ func_dict[bidstag]["scan_parameters"] = json_info
+ func_dict[bidstag]["pipe_idx"] = pipe_idx
+ if data_label.endswith("desc-brain_mask"):
+ data_label = data_label.replace("brain_mask", "bold_mask")
+ try:
+ func_paths[data_label].append(filepath)
+ except (AttributeError, KeyError, TypeError):
+ func_paths[data_label] = []
+ func_paths[data_label].append(filepath)
+
+ if func_dict:
+ self.func_outdir_ingress(
+ func_dict,
+ func_key,
+ func_paths,
+ )
+
+ if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined]
+ self.ingress_freesurfer()
+
+ def ingress_func_metadata(
+ self,
+ num_strat=None,
+ ) -> tuple[bool, bool, list[str]]:
+ """Ingress metadata for functional scans."""
+ name_suffix = ""
+ for suffix_part in (self.unique_id, num_strat):
+ if suffix_part is not None:
+ name_suffix += f"_{suffix_part}"
+ # Grab field maps
+ diff = False
+ blip = False
+ fmap_rp_list = []
+ fmap_TE_list = []
+ if self.data_paths.fmap:
+ second = False
+ for orig_key in self.data_paths.fmap:
+ gather_fmap = create_fmap_datasource(
+ self.data_paths.fmap, f"fmap_gather_{orig_key}_{self.part_id}"
+ )
+ gather_fmap.inputs.inputnode.set(
+ subject=self.part_id,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ )
+ gather_fmap.inputs.inputnode.scan = orig_key
+
+ key = orig_key
+ if "epi" in key and not second:
+ key = "epi-1"
+ second = True
+ elif "epi" in key and second:
+ key = "epi-2"
+
+ self.set_data(
+ key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress"
+ )
+ self.set_data(
+ f"{key}-scan-params",
+ gather_fmap,
+ "outputspec.scan_params",
+ {},
+ "",
+ "fmap_params_ingress",
+ )
+
+ fmap_rp_list.append(key)
+
+ get_fmap_metadata_imports = ["import json"]
+ get_fmap_metadata = pe.Node(
+ Function(
+ input_names=["data_config_scan_params"],
+ output_names=[
+ "dwell_time",
+ "pe_direction",
+ "total_readout",
+ "echo_time",
+ "echo_time_one",
+ "echo_time_two",
+ ],
+ function=get_fmap_phasediff_metadata,
+ imports=get_fmap_metadata_imports,
+ ),
+ name=f"{key}_get_metadata{name_suffix}",
+ )
+
+ self.wf.connect(
+ gather_fmap,
+ "outputspec.scan_params",
+ get_fmap_metadata,
+ "data_config_scan_params",
+ )
+
+ if "phase" in key:
+ # leave it open to all three options, in case there is a
+ # phasediff image with either a single EchoTime field (which
+ # usually matches one of the magnitude EchoTimes), OR
+ # a phasediff with an EchoTime1 and EchoTime2
+
+ # at least one of these rpool keys will have a None value,
+ # which will be sorted out in gather_echo_times below
+ self.set_data(
+ f"{key}-TE",
+ get_fmap_metadata,
+ "echo_time",
+ {},
+ "",
+ "fmap_TE_ingress",
+ )
+ fmap_TE_list.append(f"{key}-TE")
+
+ self.set_data(
+ f"{key}-TE1",
+ get_fmap_metadata,
+ "echo_time_one",
+ {},
+ "",
+ "fmap_TE1_ingress",
+ )
+ fmap_TE_list.append(f"{key}-TE1")
+
+ self.set_data(
+ f"{key}-TE2",
+ get_fmap_metadata,
+ "echo_time_two",
+ {},
+ "",
+ "fmap_TE2_ingress",
+ )
+ fmap_TE_list.append(f"{key}-TE2")
+
+ elif "magnitude" in key:
+ self.set_data(
+ f"{key}-TE",
+ get_fmap_metadata,
+ "echo_time",
+ {},
+ "",
+ "fmap_TE_ingress",
+ )
+ fmap_TE_list.append(f"{key}-TE")
+
+ self.set_data(
+ f"{key}-dwell",
+ get_fmap_metadata,
+ "dwell_time",
+ {},
+ "",
+ "fmap_dwell_ingress",
+ )
+ self.set_data(
+ f"{key}-pedir",
+ get_fmap_metadata,
+ "pe_direction",
+ {},
+ "",
+ "fmap_pedir_ingress",
+ )
+ self.set_data(
+ f"{key}-total-readout",
+ get_fmap_metadata,
+ "total_readout",
+ {},
+ "",
+ "fmap_readout_ingress",
+ )
+
+ if "phase" in key or "mag" in key:
+ diff = True
+
+ if re.match("epi_[AP]{2}", orig_key):
+ blip = True
+
+ if diff:
+ calc_delta_ratio = pe.Node(
+ Function(
+ input_names=["effective_echo_spacing", "echo_times"],
+ output_names=["deltaTE", "ees_asym_ratio"],
+ function=calc_delta_te_and_asym_ratio,
+ imports=["from typing import Optional"],
+ ),
+ name=f"diff_distcor_calc_delta{name_suffix}",
+ )
+
+ gather_echoes = pe.Node(
+ Function(
+ input_names=[
+ "echotime_1",
+ "echotime_2",
+ "echotime_3",
+ "echotime_4",
+ ],
+ output_names=["echotime_list"],
+ function=gather_echo_times,
+ ),
+ name="fugue_gather_echo_times",
+ )
+
+ for idx, fmap_file in enumerate(fmap_TE_list, start=1):
+ try:
+ node, out_file = self.get_data(
+ fmap_file, f"['{fmap_file}:fmap_TE_ingress']"
+ )
+ self.wf.connect(
+ node, out_file, gather_echoes, f"echotime_{idx}"
+ )
+ except KeyError:
+ pass
+
+ self.wf.connect(
+ gather_echoes, "echotime_list", calc_delta_ratio, "echo_times"
+ )
+
+ # Add in nodes to get parameters from configuration file
+ # a node which checks if scan_parameters are present for each scan
+ scan_params = pe.Node(
+ Function(
+ input_names=[
+ "data_config_scan_params",
+ "subject_id",
+ "scan",
+ "pipeconfig_tr",
+ "pipeconfig_tpattern",
+ "pipeconfig_start_indx",
+ "pipeconfig_stop_indx",
+ ],
+ output_names=[
+ "tr",
+ "tpattern",
+ "template",
+ "ref_slice",
+ "start_indx",
+ "stop_indx",
+ "pe_direction",
+ "effective_echo_spacing",
+ ],
+ function=get_scan_params,
+ imports=["from CPAC.utils.utils import check, try_fetch_parameter"],
+ ),
+ name=f"bold_scan_params_{self.part_id}{name_suffix}",
+ )
+ scan_params.inputs.subject_id = self.part_id
+ scan_params.inputs.set(
+ pipeconfig_start_indx=self.cfg.functional_preproc["truncation"]["start_tr"], # type: ignore[attr-defined]
+ pipeconfig_stop_indx=self.cfg.functional_preproc["truncation"]["stop_tr"], # type: ignore[attr-defined]
+ )
+
+ node, out = self.get_data("scan", "['scan:func_ingress']")
+ self.wf.connect(node, out, scan_params, "scan")
+
+ # Workaround for extracting metadata with ingress
+ if self.check_rpool("derivatives-dir"):
+ selectrest_json = pe.Node(
+ Function(
+ input_names=["scan", "rest_dict", "resource"],
+ output_names=["file_path"],
+ function=get_rest,
+ as_module=True,
+ ),
+ name="selectrest_json",
+ )
+ selectrest_json.inputs.rest_dict = self.data_paths.as_dict()
+ selectrest_json.inputs.resource = "scan_parameters"
+ self.wf.connect(node, out, selectrest_json, "scan")
+ self.wf.connect(
+ selectrest_json, "file_path", scan_params, "data_config_scan_params"
+ )
+
+ else:
+ # wire in the scan parameter workflow
+ node, out = self.get_data(
+ "scan-params", "['scan-params:scan_params_ingress']"
+ )
+ self.wf.connect(node, out, scan_params, "data_config_scan_params")
+
+ self.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress")
+ self.set_data(
+ "tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress"
+ )
+ self.set_data(
+ "template", scan_params, "template", {}, "", "func_metadata_ingress"
+ )
+ self.set_data(
+ "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress"
+ )
+ self.set_data(
+ "stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress"
+ )
+ self.set_data(
+ "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress"
+ )
+
+ if diff:
+ # Connect EffectiveEchoSpacing from functional metadata
+ self.set_data(
+ "effectiveEchoSpacing",
+ scan_params,
+ "effective_echo_spacing",
+ {},
+ "",
+ "func_metadata_ingress",
+ )
+ node, out_file = self.get_data(
+ "effectiveEchoSpacing", "['effectiveEchoSpacing:func_metadata_ingress']"
+ )
+ self.wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing")
+ self.set_data(
+ "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress"
+ )
+ self.set_data(
+ "ees-asym-ratio",
+ calc_delta_ratio,
+ "ees_asym_ratio",
+ {},
+ "",
+ "ees_asym_ratio_ingress",
+ )
+
+ return diff, blip, fmap_rp_list
+
+ def ingress_pipeconfig_paths(self):
+ """Ingress config file paths."""
+ # TODO: may want to change the resource keys for each to include one level up in the YAML as well
+
+ import pandas as pd
+ import pkg_resources as p
+
+ template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv")
+ template_df = pd.read_csv(template_csv, keep_default_na=False)
+
+ for row in template_df.itertuples():
+ key = row.Key
+ val = row.Pipeline_Config_Entry
+ val = self.cfg.get_nested(self.cfg, [x.lstrip() for x in val.split(",")])
+ resolution = row.Intended_Resolution_Config_Entry
+ desc = row.Description
+
+ if not val:
+ continue
+
+ if resolution:
+ res_keys = [x.lstrip() for x in resolution.split(",")]
+ tag = res_keys[-1]
+ json_info = {}
+
+ if "$FSLDIR" in val:
+ val = val.replace(
+ "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"]
+ )
+ if "$priors_path" in val:
+ priors_path = (
+ self.cfg.segmentation["tissue_segmentation"]["FSL-FAST"][
+ "use_priors"
+ ]["priors_path"]
+ or ""
+ )
+ if "$FSLDIR" in priors_path:
+ priors_path = priors_path.replace(
+ "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"]
+ )
+ val = val.replace("$priors_path", priors_path)
+ if "${resolution_for_anat}" in val:
+ val = val.replace(
+ "${resolution_for_anat}",
+ self.cfg.registration_workflows["anatomical_registration"][
+ "resolution_for_anat"
+ ],
+ )
+ if "${func_resolution}" in val:
+ val = val.replace(
+ "${func_resolution}",
+ self.cfg.registration_workflows["functional_registration"][
+ "func_registration_to_template"
+ ]["output_resolution"][tag],
+ )
+
+ if desc:
+ template_name, _template_desc = lookup_identifier(val)
+ if template_name:
+ desc = f"{template_name} - {desc}"
+ json_info["Description"] = f"{desc} - {val}"
+ if resolution:
+ resolution = self.cfg.get_nested(self.cfg, res_keys)
+ json_info["Resolution"] = resolution
+
+ resampled_template = pe.Node(
+ Function(
+ input_names=["resolution", "template", "template_name", "tag"],
+ output_names=["resampled_template"],
+ function=resolve_resolution,
+ as_module=True,
+ ),
+ name="resampled_" + key,
+ )
+
+ resampled_template.inputs.resolution = resolution
+ resampled_template.inputs.template = val
+ resampled_template.inputs.template_name = key
+ resampled_template.inputs.tag = tag
+
+ # the set_data below is set up a little differently, because we are
+ # injecting and also over-writing already-existing entries
+ # other alternative would have been to ingress into the
+ # resampled_template node from the already existing entries, but we
+ # didn't do that here
+ self.set_data(
+ key,
+ resampled_template,
+ "resampled_template",
+ json_info,
+ "",
+ "template_resample",
+ ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually
+
+ elif val:
+ config_ingress = create_general_datasource(f"gather_{key}")
+ config_ingress.inputs.inputnode.set(
+ unique_id=self.unique_id,
+ data=val,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"],
+ )
+ self.set_data(
+ key,
+ config_ingress,
+ "outputspec.data",
+ json_info,
+ "",
+ f"{key}_config_ingress",
+ )
+
+ def create_func_datasource(
+ self, rest_dict: dict, wf_name="func_datasource"
+ ) -> pe.Workflow:
+ """Create a :py:class:`~nipype.pipeline.engine.Workflow` to gather timeseries data.
+
+ Return the functional timeseries-related file paths for each series/scan from the
+ dictionary of functional files described in the data configuration (sublist) YAML
+ file.
+
+ Scan input (from inputnode) is an iterable.
+ """
+ wf = pe.Workflow(name=wf_name)
+
+ inputnode = pe.Node(
+ util.IdentityInterface(
+ fields=["subject", "scan", "creds_path", "dl_dir"],
+ mandatory_inputs=True,
+ ),
+ name="inputnode",
+ )
+
+ outputnode = pe.Node(
+ util.IdentityInterface(
+ fields=[
+ "subject",
+ "rest",
+ "scan",
+ "scan_params",
+ "phase_diff",
+ "magnitude",
+ ]
+ ),
+ name="outputspec",
+ )
+
+ # have this here for now because of the big change in the data
+ # configuration format
+ # (Not necessary with ingress - format does not comply)
+ if not self.check_rpool("derivatives-dir"):
+ check_scan = pe.Node(
+ Function(
+ input_names=["func_scan_dct", "scan"],
+ output_names=[],
+ function=check_func_scan,
+ as_module=True,
+ ),
+ name="check_func_scan",
+ )
+
+ check_scan.inputs.func_scan_dct = rest_dict
+ wf.connect(inputnode, "scan", check_scan, "scan")
+
+ # get the functional scan itself
+ selectrest = pe.Node(
+ Function(
+ input_names=["scan", "rest_dict", "resource"],
+ output_names=["file_path"],
+ function=get_rest,
+ as_module=True,
+ ),
+ name="selectrest",
+ )
+ selectrest.inputs.rest_dict = rest_dict
+ selectrest.inputs.resource = "scan"
+ wf.connect(inputnode, "scan", selectrest, "scan")
+
+ # check to see if it's on an Amazon AWS S3 bucket, and download it, if it
+ # is - otherwise, just return the local file path
+ check_s3_node = pe.Node(
+ Function(
+ input_names=["file_path", "creds_path", "dl_dir", "img_type"],
+ output_names=["local_path"],
+ function=check_for_s3,
+ as_module=True,
+ ),
+ name="check_for_s3",
+ )
+
+ wf.connect(selectrest, "file_path", check_s3_node, "file_path")
+ wf.connect(inputnode, "creds_path", check_s3_node, "creds_path")
+ wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir")
+ check_s3_node.inputs.img_type = "func"
+
+ wf.connect(inputnode, "subject", outputnode, "subject")
+ wf.connect(check_s3_node, "local_path", outputnode, "rest")
+ wf.connect(inputnode, "scan", outputnode, "scan")
+
+ # scan parameters CSV
+ select_scan_params = pe.Node(
+ Function(
+ input_names=["scan", "rest_dict", "resource"],
+ output_names=["file_path"],
+ function=get_rest,
+ as_module=True,
+ ),
+ name="select_scan_params",
+ )
+ select_scan_params.inputs.rest_dict = rest_dict
+ select_scan_params.inputs.resource = "scan_parameters"
+ wf.connect(inputnode, "scan", select_scan_params, "scan")
+
+ # if the scan parameters file is on AWS S3, download it
+ s3_scan_params = pe.Node(
+ Function(
+ input_names=["file_path", "creds_path", "dl_dir", "img_type"],
+ output_names=["local_path"],
+ function=check_for_s3,
+ as_module=True,
+ ),
+ name="s3_scan_params",
+ )
+
+ wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path")
+ wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path")
+ wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir")
+ wf.connect(s3_scan_params, "local_path", outputnode, "scan_params")
+
+ return wf
+
+ def ingress_raw_func_data(self):
+ """Ingress raw functional data."""
+ func_paths_dct = self.data_paths.func
+
+ func_wf = self.create_func_datasource(
+ func_paths_dct, f"func_ingress_{self.part_id}_{self.ses_id}"
+ )
+ func_wf.inputs.inputnode.set(
+ subject=self.part_id,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"],
+ )
+ func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys()))
+
+ self.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress")
+ self.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress")
+ self.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress")
+ self.set_data(
+ "scan-params",
+ func_wf,
+ "outputspec.scan_params",
+ {},
+ "",
+ "scan_params_ingress",
+ )
+
+ # TODO: CHECK FOR PARAMETERS
+
+ diff, blip, fmap_rp_list = self.ingress_func_metadata()
+
+ # Memoize list of local functional scans
+ # TODO: handle S3 files
+ # Skip S3 files for now
+
+ local_func_scans = [
+ func_paths_dct[scan]["scan"]
+ for scan in func_paths_dct.keys()
+ if not func_paths_dct[scan]["scan"].startswith("s3://")
+ ]
+ if local_func_scans:
+ # pylint: disable=protected-access
+ self.wf._local_func_scans = local_func_scans
+ if self.cfg.pipeline_setup["Debugging"]["verbose"]:
+ verbose_logger = getLogger("CPAC.engine")
+ verbose_logger.debug("local_func_scans: %s", local_func_scans)
+ del local_func_scans
+
+ return diff, blip, fmap_rp_list
+
+ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> None:
+ """Ingress a functional output directory."""
+ pipe_x = len(self.pipe_list)
+ ingress = self.create_func_datasource(
+ func_dict, f"gather_func_outdir_{key}_{pipe_x}"
+ )
+ ingress.inputs.inputnode.set(
+ subject=self.unique_id,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ )
+ self.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress")
+ ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys()))
+ self.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress")
+
+ self.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress")
+ self.set_data(
+ "scan-params",
+ ingress,
+ "outputspec.scan_params",
+ {},
+ "",
+ "scan_params_ingress",
+ )
+ self.ingress_func_metadata()
+
+ # Have to do it this weird way to save the parsed BIDS tag & filepath
+ mask_paths_key = (
+ "desc-bold_mask"
+ if "desc-bold_mask" in func_paths
+ else "space-template_desc-bold_mask"
+ )
+ ts_paths_key = "pipeline-ingress_desc-confounds_timeseries"
+
+ # Connect func data with approproate scan name
+ iterables = pe.Node(
+ Function(
+ input_names=["scan", "mask_paths", "ts_paths"],
+ output_names=["out_scan", "mask", "confounds"],
+ function=set_iterables,
+ ),
+ name=f"set_iterables_{pipe_x}",
+ )
+ iterables.inputs.mask_paths = func_paths[mask_paths_key]
+ iterables.inputs.ts_paths = func_paths[ts_paths_key]
+ self.wf.connect(ingress, "outputspec.scan", iterables, "scan")
+
+ for key in func_paths:
+ if key in (mask_paths_key, ts_paths_key):
+ ingress_func = create_general_datasource(f"ingress_func_data_{key}")
+ ingress_func.inputs.inputnode.set(
+ unique_id=self.unique_id,
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ )
+ self.wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan")
+ if key == mask_paths_key:
+ self.wf.connect(iterables, "mask", ingress_func, "inputnode.data")
+ self.set_data(
+ key,
+ ingress_func,
+ "inputnode.data",
+ {},
+ "",
+ f"outdir_{key}_ingress",
+ )
+ elif key == ts_paths_key:
+ self.wf.connect(
+ iterables, "confounds", ingress_func, "inputnode.data"
+ )
+ self.set_data(
+ key,
+ ingress_func,
+ "inputnode.data",
+ {},
+ "",
+ f"outdir_{key}_ingress",
+ )
+
+ def json_outdir_ingress(
+ self, filepath: Path | str, data_label: str, json: dict
+ ) -> tuple[dict, tuple[str, str], str, str]:
+ """Ingress sidecars from a BIDS derivatives directory."""
+ desc_val = None
+ for tag in data_label.split("_"):
+ if "desc-" in tag:
+ desc_val = tag
+ break
+ jsonpath = str(filepath)
+ for ext in EXTS:
+ jsonpath = jsonpath.replace(ext, "")
+ jsonpath = f"{jsonpath}.json"
+
+ if not os.path.exists(jsonpath):
+ WFLOGGER.info(
+ "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n",
+ filepath,
+ jsonpath,
+ )
+ json_info = {
+ "Description": "This data was generated elsewhere and "
+ "supplied by the user into this C-PAC run's "
+ "output directory. This JSON file was "
+ "automatically generated by C-PAC because a "
+ "JSON file was not supplied with the data."
+ }
+ json_info = {**json_info, **json}
+ write_output_json(json_info, jsonpath)
+ else:
+ json_info = read_json(jsonpath)
+ json_info = {**json_info, **json}
+ if "CpacProvenance" in json_info:
+ if desc_val:
+ # it's a C-PAC output, let's check for pipe_idx/strat integer
+ # suffixes in the desc- entries.
+ only_desc = str(desc_val)
+
+ if only_desc[-1].isdigit():
+ for _strat_idx in range(0, 3):
+ # let's stop at 3, please don't run >999 strategies okay?
+ if only_desc[-1].isdigit():
+ only_desc = only_desc[:-1]
+
+ if only_desc[-1] == "-":
+ only_desc = only_desc.rstrip("-")
+ else:
+ msg = (
+ "\n[!] Something went wrong with either "
+ "reading in the output directory or when "
+ "it was written out previously.\n\nGive "
+ "this to your friendly local C-PAC "
+ f"developer:\n\n{data_label!s}\n"
+ )
+ raise IOError(msg)
+
+ # remove the integer at the end of the desc-* variant, we will
+ # get the unique pipe_idx from the CpacProvenance below
+ data_label = data_label.replace(desc_val, only_desc)
+
+ # preserve cpac provenance/pipe_idx
+ pipe_idx = self.generate_prov_string(json_info["CpacProvenance"])
+ node_name = ""
+ else:
+ json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] # type: ignore [assignment]
+ if "Description" not in json_info:
+ json_info["Description"] = (
+ "This data was generated elsewhere and "
+ "supplied by the user into this C-PAC run's "
+ "output directory. This JSON file was "
+ "automatically generated by C-PAC because a "
+ "JSON file was not supplied with the data."
+ )
+ pipe_idx = self.generate_prov_string(json_info["CpacProvenance"])
+ node_name = f"{data_label}_ingress"
+
+ return json_info, pipe_idx, node_name, data_label
+
+ def ingress_raw_anat_data(self) -> None:
+ """Ingress raw anatomical data."""
+ if not self.data_paths.anat:
+ WFLOGGER.warning("No anatomical data present.")
+ return
+
+ anat_flow = create_anat_datasource(f"anat_T1w_gather_{self.unique_id}")
+
+ anat = {}
+ if "T1w" in self.data_paths.anat:
+ anat["T1"] = self.data_paths.anat["T1w"]
+
+ if "T1" in anat:
+ anat_flow.inputs.inputnode.set(
+ subject=self.part_id,
+ anat=anat["T1"],
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ img_type="anat",
+ )
+ self.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress")
+
+ if "T2w" in self.data_paths.anat:
+ anat_flow_T2 = create_anat_datasource(
+ f"anat_T2w_gather_{self.part_id}_{self.ses_id}"
+ )
+ anat_flow_T2.inputs.inputnode.set(
+ subject=self.part_id,
+ anat=self.data_paths.anat["T2w"],
+ creds_path=self.creds_path,
+ dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined]
+ img_type="anat",
+ )
+ self.set_data(
+ "T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress"
+ )
+
+ if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined]
+ self.ingress_freesurfer()
+
+ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # noqa: PLR0912,PLR0915
+ """Connect a :py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlock` via the `ResourcePool`."""
+ debug = bool(self.cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined]
+ all_opts: list[str] = []
+
+ sidecar_additions = {
+ "CpacConfigHash": hashlib.sha1(
+ json.dumps(self.cfg.dict(), sort_keys=True).encode("utf-8")
+ ).hexdigest(),
+ "CpacConfig": self.cfg.dict(),
+ }
+
+ if self.cfg["pipeline_setup"]["output_directory"].get("user_defined"):
+ sidecar_additions["UserDefined"] = self.cfg["pipeline_setup"][
+ "output_directory"
+ ]["user_defined"]
+
+ for name, block_dct in block.node_blocks.items():
+ # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking.
+ switch = _check_null(block_dct["switch"])
+ config = _check_null(block_dct["config"])
+ option_key = _check_null(block_dct["option_key"])
+ option_val = _check_null(block_dct["option_val"])
+ inputs: NODEBLOCK_INPUTS = _check_null(block_dct["inputs"])
+ outputs: NODEBLOCK_OUTPUTS = _check_null(block_dct["outputs"])
+
+ block_function: NodeBlockFunction = block_dct["block_function"]
+
+ opts = []
+ if option_key and option_val:
+ if not isinstance(option_key, list):
+ option_key = [option_key]
+ if not isinstance(option_val, list):
+ option_val = [option_val]
+ if config:
+ key_list = config + option_key
+ else:
+ key_list = option_key
+ if "USER-DEFINED" in option_val:
+ # load custom config data into each 'opt'
+ opts = self.cfg[key_list]
+ else:
+ for option in option_val:
+ try:
+ if option in self.cfg[key_list]:
+ # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list
+ opts.append(option)
+ except AttributeError as err:
+ msg = f"{err}\nNode Block: {name}"
+ raise Exception(msg)
+
+ if opts is None:
+ opts = [opts]
+
+ elif option_key and not option_val:
+ # enables multiple config forking entries
+ if not isinstance(option_key[0], list):
+ msg = (
+ f"[!] The option_key field ({option_key}) "
+ f"for {name} exists but there is no "
+ "option_val.\n\nIf you are trying to "
+ "populate multiple option keys, the "
+ "option_val field must contain a list of "
+ "a list.\n"
+ )
+ raise ValueError(msg)
+ for option_config in option_key:
+ # option_config is a list of pipe config levels down to the option
+ if config:
+ key_list = config + option_config
+ else:
+ key_list = option_config
+ option_val = option_config[-1]
+ if option_val in self.cfg[key_list[:-1]]:
+ opts.append(option_val)
+ else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!!
+ opts = [None]
+ # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples
+ if not opts:
+ # for node blocks where the options are split into different
+ # block functions - opts will be empty for non-selected
+ # options, and would waste the get_strats effort below
+ continue
+ all_opts += opts
+
+ if not switch:
+ switch = [True]
+ else:
+ if config:
+ try:
+ key_list = config + switch
+ except TypeError as te:
+ msg = (
+ "\n\n[!] Developer info: Docstring error "
+ f"for {name}, make sure the 'config' or "
+ "'switch' fields are lists.\n\n"
+ )
+ raise TypeError(msg) from te
+ switch = self.cfg[key_list]
+ elif isinstance(switch[0], list):
+ # we have multiple switches, which is designed to only work if
+ # config is set to "None"
+ switch_list = []
+ for key_list in switch:
+ val = self.cfg[key_list]
+ if isinstance(val, list):
+ # fork switches
+ if True in val:
+ switch_list.append(True)
+ if False in val:
+ switch_list.append(False)
+ else:
+ switch_list.append(val)
+ if False in switch_list:
+ switch = [False]
+ else:
+ switch = [True]
+ else:
+ # if config is set to "None"
+ key_list = switch
+ switch = self.cfg[key_list]
+ if not isinstance(switch, list):
+ switch = [switch]
+ if True in switch:
+ for (
+ pipe_idx,
+ strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.}
+ ) in self.get_strats(inputs, debug).items():
+ # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} }
+ fork = False in switch
+ for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys.
+ # remember, you can get 'data' or 'json' from strat_pool with member functions
+ # strat_pool has all of the JSON information of all the inputs!
+ # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output.
+ # particularly, our custom 'CpacProvenance' field.
+ node_name = name
+ pipe_x = self._get_pipe_number(pipe_idx)
+
+ replaced_inputs = []
+ for interface in block.input_interface:
+ if isinstance(interface[1], list):
+ for input_name in interface[1]:
+ if strat_pool.check_rpool(input_name):
+ break
+ else:
+ input_name = interface[1]
+ strat_pool.copy_resource(input_name, interface[0])
+ replaced_inputs.append(interface[0])
+ try:
+ wf, outs = block_function(
+ wf, self.cfg, strat_pool, pipe_x, opt
+ )
+ except IOError as e: # duplicate node
+ WFLOGGER.warning(e)
+ continue
+
+ if not outs:
+ if block_function.__name__ == "freesurfer_postproc":
+ WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA)
+ LOGTAIL["warnings"].append(
+ WARNING_FREESURFER_OFF_WITH_DATA
+ )
+ continue
+
+ if opt and len(option_val) > 1:
+ node_name = f"{node_name}_{opt}"
+ elif opt and "USER-DEFINED" in option_val:
+ node_name = f'{node_name}_{opt["Name"]}'
+
+ if debug:
+ verbose_logger = getLogger("CPAC.engine")
+ verbose_logger.debug("\n=======================")
+ verbose_logger.debug("Node name: %s", node_name)
+ prov_dct = self.get_resource_strats_from_prov(
+ ast.literal_eval(str(pipe_idx))
+ )
+ for key, val in prov_dct.items():
+ verbose_logger.debug("-------------------")
+ verbose_logger.debug("Input - %s:", key)
+ sub_prov_dct = self.get_resource_strats_from_prov(val)
+ for sub_key, sub_val in sub_prov_dct.items():
+ sub_sub_dct = self.get_resource_strats_from_prov(
+ sub_val
+ )
+ verbose_logger.debug(" sub-input - %s:", sub_key)
+ verbose_logger.debug(" prov = %s", sub_val)
+ verbose_logger.debug(
+ " sub_sub_inputs = %s", sub_sub_dct.keys()
+ )
+
+ for label, connection in outs.items():
+ block.check_output(outputs, label, name)
+ new_json_info = strat_pool.json
+
+ # transfer over data-specific json info
+ # for example, if the input data json is _bold and the output is also _bold
+ data_type = label.split("_")[-1]
+ if data_type in new_json_info["subjson"]:
+ if (
+ "SkullStripped"
+ in new_json_info["subjson"][data_type]
+ ):
+ new_json_info["SkullStripped"] = new_json_info[
+ "subjson"
+ ][data_type]["SkullStripped"]
+
+ # determine sources for the outputs, i.e. all input data into the node block
+ new_json_info["Sources"] = [
+ x
+ for x in strat_pool.rpool
+ if x != "json" and x not in replaced_inputs
+ ]
+
+ if isinstance(outputs, dict):
+ new_json_info.update(outputs[label])
+ if "Description" not in outputs[label]:
+ # don't propagate old Description
+ try:
+ del new_json_info["Description"]
+ except KeyError:
+ pass
+ if "Template" in outputs[label]:
+ template_key = outputs[label]["Template"]
+ if template_key in new_json_info["Sources"]:
+ # only if the pipeline config template key is entered as the 'Template' field
+ # otherwise, skip this and take in the literal 'Template' string
+ try:
+ new_json_info["Template"] = new_json_info[
+ "subjson"
+ ][template_key]["Description"]
+ except KeyError:
+ pass
+ try:
+ new_json_info["Resolution"] = new_json_info[
+ "subjson"
+ ][template_key]["Resolution"]
+ except KeyError:
+ pass
+ else:
+ # don't propagate old Description
+ try:
+ del new_json_info["Description"]
+ except KeyError:
+ pass
+
+ if "Description" in new_json_info:
+ new_json_info["Description"] = " ".join(
+ new_json_info["Description"].split()
+ )
+
+ for sidecar_key, sidecar_value in sidecar_additions.items():
+ if sidecar_key not in new_json_info:
+ new_json_info[sidecar_key] = sidecar_value
+
+ try:
+ del new_json_info["subjson"]
+ except KeyError:
+ pass
+
+ if fork or len(opts) > 1 or len(all_opts) > 1:
+ if "CpacVariant" not in new_json_info:
+ new_json_info["CpacVariant"] = {}
+ raw_label = self.get_raw_label(label)
+ if raw_label not in new_json_info["CpacVariant"]:
+ new_json_info["CpacVariant"][raw_label] = []
+ new_json_info["CpacVariant"][raw_label].append(
+ node_name
+ )
+
+ self.set_data(
+ label,
+ connection[0],
+ connection[1],
+ new_json_info,
+ pipe_idx,
+ node_name,
+ fork,
+ )
+
+ wf, post_labels = self.post_process(
+ wf,
+ label,
+ connection,
+ new_json_info,
+ pipe_idx,
+ pipe_x,
+ outs,
+ )
+
+ if self.func_reg:
+ for postlabel in post_labels:
+ connection = ResourceData( # noqa: PLW2901
+ postlabel[1], postlabel[2]
+ )
+ wf = self.derivative_xfm(
+ wf,
+ postlabel[0],
+ connection,
+ new_json_info,
+ pipe_idx,
+ pipe_x,
+ )
+ return wf
+
+ def connect_pipeline(
+ self,
+ wf: pe.Workflow,
+ cfg: Configuration,
+ pipeline_blocks: PIPELINE_BLOCKS,
+ ) -> pe.Workflow:
+ """Connect the pipeline blocks to the workflow."""
+ from CPAC.pipeline.engine.nodeblock import NodeBlockFunction, PIPELINE_BLOCKS
+
+ WFLOGGER.info(
+ "Connecting pipeline blocks:\n%s",
+ NodeBlock.list_blocks(pipeline_blocks, indent=1),
+ )
+ previous_nb: Optional[NodeBlockFunction | PIPELINE_BLOCKS] = None
+ for block in pipeline_blocks:
+ try:
+ wf = self.connect_block(
+ wf,
+ NodeBlock(
+ block, debug=cfg["pipeline_setup", "Debugging", "verbose"]
+ ),
+ )
+ except LookupError as e:
+ if getattr(block, "name", "") == "freesurfer_postproc":
+ WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA)
+ LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA)
+ continue
+ previous_nb_str = (
+ (f"after node block '{previous_nb.name}':")
+ if isinstance(previous_nb, NodeBlockFunction)
+ else "at beginning:"
+ )
+ # Alert user to block that raises error
+ if isinstance(block, list):
+ node_block_names = str([NodeBlock(b).name for b in block])
+ e.args = (
+ f"When trying to connect one of the node blocks "
+ f"{node_block_names} "
+ f"to workflow '{wf}' {previous_nb_str} {e.args[0]}",
+ )
+ else:
+ node_block_names = NodeBlock(block).name
+ e.args = (
+ f"When trying to connect node block "
+ f"'{node_block_names}' "
+ f"to workflow '{wf}' {previous_nb_str} {e.args[0]}",
+ )
+ if cfg.pipeline_setup["Debugging"]["verbose"]: # type: ignore [attr-defined]
+ verbose_logger = getLogger("CPAC.engine")
+ verbose_logger.debug(e.args[0])
+ verbose_logger.debug(self)
+ raise
+ previous_nb = block
+
+ return wf
+
+ def derivative_xfm(
+ self,
+ wf: pe.Workflow,
+ label: str,
+ connection: ResourceData | tuple[pe.Node | pe.Workflow, str],
+ json_info: dict,
+ pipe_idx: str | tuple,
+ pipe_x: int,
+ ) -> pe.Workflow:
+ """Find the appropriate bold-to-template transform for given `pipe_idx`."""
+ if label in self.xfm:
+ json_info = dict(json_info)
+
+ # get the bold-to-template transform from the current strat_pool info
+ xfm_idx: Optional[str | tuple] = None
+ xfm_label = "from-bold_to-template_mode-image_xfm"
+ for entry in json_info["CpacProvenance"]:
+ if isinstance(entry, list):
+ if entry[-1].split(":")[0] == xfm_label:
+ xfm_prov = entry
+ xfm_idx = self.generate_prov_string(xfm_prov)[1]
+ break
+
+ # but if the resource doesn't have the bold-to-template transform
+ # in its provenance/strategy, find the appropriate one for this
+ # current pipe_idx/strat
+ xfm_info: list[tuple[str | tuple, list]]
+ if not xfm_idx:
+ xfm_info = []
+ for pipe_idx, entry in self.get(xfm_label).items():
+ xfm_info.append((pipe_idx, entry.cpac_provenance))
+ else:
+ xfm_info = [(xfm_idx, xfm_prov)]
+
+ for num, xfm_entry in enumerate(xfm_info):
+ xfm_idx, xfm_prov = xfm_entry
+ reg_tool = check_prov_for_regtool(xfm_prov)
+
+ xfm = transform_derivative(
+ f"{label}_xfm_{pipe_x}_{num}",
+ label,
+ reg_tool,
+ self.num_cpus,
+ self.num_ants_cores,
+ ants_interp=self.ants_interp,
+ fsl_interp=self.fsl_interp,
+ opt=None,
+ )
+ wf.connect(connection[0], connection[1], xfm, "inputspec.in_file")
+
+ node, out = self.get_data("T1w-brain-template-deriv", quick_single=True)
+ wf.connect(node, out, xfm, "inputspec.reference")
+
+ node, out = self.get_data(
+ "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx
+ )
+ wf.connect(node, out, xfm, "inputspec.transform")
+
+ label = f"space-template_{label}"
+ json_info["Template"] = self.get_json_info(
+ "T1w-brain-template-deriv", "Description"
+ )
+ new_prov = json_info["CpacProvenance"] + xfm_prov
+ json_info["CpacProvenance"] = new_prov
+ new_pipe_idx = self.generate_prov_string(new_prov)
+ self.set_data(
+ label,
+ xfm,
+ "outputspec.out_file",
+ json_info,
+ new_pipe_idx,
+ f"{label}_xfm_{num}",
+ fork=True,
+ )
+
+ return wf
+
+ def post_process(
+ self,
+ wf: pe.Workflow,
+ label: str,
+ connection: ResourceData | tuple[pe.Node | pe.Workflow, str],
+ json_info: dict,
+ pipe_idx: str | tuple,
+ pipe_x: int,
+ outs: dict[str, ResourceData],
+ ) -> tuple[pe.Workflow, list[tuple[str, pe.Node | pe.Workflow, str]]]:
+ """Connect smoothing and z-scoring, if configured."""
+ input_type = "func_derivative"
+
+ post_labels = [(label, connection[0], connection[1])]
+
+ if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label):
+ # suffix: [eigenvector or degree] centrality [binarized or weighted]
+ # or lfcd [binarized or weighted]
+ mask = "template-specification-file"
+ elif "space-template" in label:
+ if "space-template_res-derivative_desc-bold_mask" in self.keys():
+ mask = "space-template_res-derivative_desc-bold_mask"
+ else:
+ mask = "space-template_desc-bold_mask"
+ else:
+ mask = "space-bold_desc-brain_mask"
+
+ mask_idx = None
+ for entry in json_info["CpacProvenance"]:
+ if isinstance(entry, list):
+ if entry[-1].split(":")[0] == mask:
+ mask_prov = entry
+ mask_idx = self.generate_prov_string(mask_prov)[1]
+ break
+
+ if self.smoothing_bool:
+ if label in Outputs.to_smooth:
+ for smooth_opt in self.smooth_opts:
+ sm = spatial_smoothing(
+ f"{label}_smooth_{smooth_opt}_{pipe_x}",
+ self.fwhm,
+ input_type,
+ smooth_opt,
+ )
+ wf.connect(connection[0], connection[1], sm, "inputspec.in_file")
+ node, out = self.get_data(
+ mask, pipe_idx=mask_idx, quick_single=mask_idx is None
+ )
+ wf.connect(node, out, sm, "inputspec.mask")
+
+ if "desc-" not in label:
+ if "space-" in label:
+ for tag in label.split("_"):
+ if "space-" in tag:
+ smlabel = label.replace(tag, f"{tag}_desc-sm")
+ break
+ else:
+ smlabel = f"desc-sm_{label}"
+ else:
+ for tag in label.split("_"):
+ if "desc-" in tag:
+ newtag = f"{tag}-sm"
+ smlabel = label.replace(tag, newtag)
+ break
+
+ post_labels.append((smlabel, sm, "outputspec.out_file"))
+
+ self.set_data(
+ smlabel,
+ sm,
+ "outputspec.out_file",
+ json_info,
+ pipe_idx,
+ f"spatial_smoothing_{smooth_opt}",
+ fork=True,
+ )
+ self.set_data(
+ "fwhm",
+ sm,
+ "outputspec.fwhm",
+ json_info,
+ pipe_idx,
+ f"spatial_smoothing_{smooth_opt}",
+ fork=True,
+ )
+
+ if self.zscoring_bool:
+ for label_con_tpl in post_labels:
+ label = label_con_tpl[0]
+ connection = (label_con_tpl[1], label_con_tpl[2])
+ if label in Outputs.to_zstd:
+ zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type)
+
+ wf.connect(connection[0], connection[1], zstd, "inputspec.in_file")
+
+ node, out = self.get_data(mask, pipe_idx=mask_idx)
+ wf.connect(node, out, zstd, "inputspec.mask")
+
+ if "desc-" not in label:
+ if "space-template" in label:
+ new_label = label.replace(
+ "space-template", "space-template_desc-zstd"
+ )
+ else:
+ new_label = f"desc-zstd_{label}"
+ else:
+ for tag in label.split("_"):
+ if "desc-" in tag:
+ newtag = f"{tag}-zstd"
+ new_label = label.replace(tag, newtag)
+ break
+
+ post_labels.append((new_label, zstd, "outputspec.out_file"))
+
+ self.set_data(
+ new_label,
+ zstd,
+ "outputspec.out_file",
+ json_info,
+ pipe_idx,
+ "zscore_standardize",
+ fork=True,
+ )
+
+ elif label in Outputs.to_fisherz:
+ zstd = fisher_z_score_standardize(
+ f"{label}_zstd_{pipe_x}", label, input_type
+ )
+
+ wf.connect(
+ connection[0], connection[1], zstd, "inputspec.correlation_file"
+ )
+
+ # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries'
+ oned = label.replace("correlations", "timeseries")
+
+ node, out = outs[oned]
+ wf.connect(node, out, zstd, "inputspec.timeseries_oned")
+
+ post_labels.append((new_label, zstd, "outputspec.out_file"))
+
+ self.set_data(
+ new_label,
+ zstd,
+ "outputspec.out_file",
+ json_info,
+ pipe_idx,
+ "fisher_zscore_standardize",
+ fork=True,
+ )
+
+ return wf, post_labels
+
+ @staticmethod
+ def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]:
+ """Return all entries that led to this provenance.
+
+ If you provide the provenance of a `ResourcePool` output, this will
+ return a dictionary of all the preceding `ResourcePool` entries that
+ led to that one specific output::
+ {rpool entry}: {that entry's provenance}
+ {rpool entry}: {that entry's provenance}
+ """
+ strat_resource: dict[str, list | str] = {}
+ if isinstance(prov, str):
+ resource = prov.split(":")[0]
+ strat_resource[resource] = prov
+ else:
+ for entry in prov:
+ if isinstance(entry, list):
+ resource = entry[-1].split(":")[0]
+ strat_resource[resource] = entry
+ elif isinstance(entry, str):
+ resource = entry.split(":")[0]
+ strat_resource[resource] = entry
+ return strat_resource
+
+ def _config_lookup(
+ self, keylist: str | list[str], fallback_type: type = NoneType
+ ) -> Any:
+ """Lookup a :py:class:`~CPAC.utils.configuration.Configuration` key, return ``None`` if not found."""
+ try:
+ return self.cfg[keylist]
+ except (AttributeError, KeyError):
+ return fallback_type()
+
+ def _get_pipe_number(self, pipe_idx: str | tuple) -> int:
+ """Return the index of a strategy in `self.pipe_list`."""
+ return self.pipe_list.index(pipe_idx)
+
+ def _get_unlabelled(self, resource: str) -> set[str]:
+ """Get unlabelled :py:class:`Resource` s.
+
+ These :py:class:`Resource` s need integer suffixes to differentiate.
+ """
+ from CPAC.func_preproc.func_motion import motion_estimate_filter
+
+ all_jsons = [
+ self.rpool[resource][pipe_idx]._json for pipe_idx in self.rpool[resource]
+ ]
+ unlabelled = {
+ key
+ for json_info in all_jsons
+ for key in json_info.get("CpacVariant", {}).keys()
+ if key not in (*motion_estimate_filter.outputs, "regressors")
+ }
+ if "bold" in unlabelled:
+ all_bolds = list(
+ chain.from_iterable(
+ json_info["CpacVariant"]["bold"]
+ for json_info in all_jsons
+ if "CpacVariant" in json_info and "bold" in json_info["CpacVariant"]
+ )
+ )
+ if all(
+ re.match(r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold)
+ for _bold in all_bolds
+ ):
+ # this fork point should only result in 0 or 1 forks
+ unlabelled.remove("bold")
+ del all_bolds
+ all_forks = {
+ key: set(
+ chain.from_iterable(
+ json_info["CpacVariant"][key]
+ for json_info in all_jsons
+ if "CpacVariant" in json_info and key in json_info["CpacVariant"]
+ )
+ )
+ for key in unlabelled
+ }
+ del all_jsons
+ for key, forks in all_forks.items():
+ if len(forks) < 2: # noqa: PLR2004
+ # no int suffix needed if only one fork
+ unlabelled.remove(key)
+ del all_forks
+ return unlabelled
+
+
+class StratPool(_Pool):
+ """A pool of :py:class:`ResourcePool` s keyed by strategy."""
+
+ def __init__(
+ self,
+ cfg: Configuration,
+ *,
+ rpool: Optional[dict] = None,
+ name: str | list[str] = "",
+ ) -> None:
+ """Initialize a `StratPool`."""
+ super().__init__()
+ if not rpool:
+ self.rpool = STRAT_DICT({})
+ else:
+ self.rpool = STRAT_DICT(rpool)
+ self._json: dict[str, dict] = {"subjson": {}}
+ self.cfg = cfg
+ if not isinstance(name, list):
+ name = [name]
+ self.name: list[str] = name
+ self._regressor_dct: dict = {}
+
+ def append_name(self, name: str) -> None:
+ """Append a name to the `StratPool`."""
+ self.name.append(name)
+
+ @overload
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ report_fetched: Literal[False] = False,
+ *,
+ optional: Literal[True],
+ ) -> Optional[Resource]: ...
+ @overload
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX],
+ report_fetched: Literal[True],
+ optional: Literal[True],
+ ) -> tuple[Optional[Resource], Optional[str]]: ...
+ @overload
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ *,
+ report_fetched: Literal[True],
+ optional: Literal[False],
+ ) -> tuple[Resource, str]: ...
+ @overload
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ report_fetched: bool = False,
+ *,
+ optional: Literal[True],
+ ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ...
+ @overload
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ report_fetched: Literal[False] = False,
+ optional: Literal[False] = False,
+ ) -> Resource: ...
+ @overload
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ *,
+ report_fetched: Literal[True],
+ optional: Literal[False] = False,
+ ) -> tuple[Resource, str]: ...
+ @overload
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ report_fetched: bool = False,
+ optional: bool = False,
+ ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ...
+ def get(
+ self,
+ resource: list[str] | str,
+ pipe_idx: Optional[PIPE_IDX] = None,
+ report_fetched: bool = False,
+ optional: bool = False,
+ ):
+ """Return a :py:class:`Resource` ."""
+ return super().get(resource, pipe_idx, report_fetched, optional)
+
+ @overload
+ def get_data(
+ self, resource: list[str] | str, report_fetched: Literal[True]
+ ) -> tuple[ResourceData, str]: ...
+ @overload
+ def get_data(
+ self, resource: list[str] | str, report_fetched: Literal[False] = False
+ ) -> ResourceData: ...
+ def get_data(self, resource, report_fetched=False):
+ """Get :py:class:`ResourceData` from a `StratPool`."""
+ _resource = self.get(resource, report_fetched=report_fetched)
+ if report_fetched:
+ assert isinstance(_resource, tuple)
+ connect, fetched = _resource
+ assert isinstance(connect, Resource) and isinstance(fetched, str)
+ return connect.data, fetched
+ assert isinstance(_resource, Resource)
+ return _resource.data
+
+ def get_json(self, resource: str) -> dict:
+ """Get JSON metadata from a :py:class:`Resource` in a `StratPool`."""
+ return self.get(resource).json
+
+ json = property(
+ fget=Resource.get_json,
+ fset=Resource.set_json,
+ doc="""Return a deep copy of full-`StratPool`-strategy-specific JSON.""",
+ )
+
+ def get_cpac_provenance(self, resource: list[str] | str) -> list:
+ """Get "CpacProvenance" for a given :py:class:`Resource` ."""
+ # NOTE: strat_resource has to be entered properly by the developer
+ # it has to either be rpool[resource][strat] or strat_pool[resource]
+ if isinstance(resource, list):
+ for _resource in resource:
+ try:
+ return self.get_cpac_provenance(_resource)
+ except KeyError:
+ continue
+ return self.get(resource).cpac_provenance
+
+ def copy_resource(self, resource: str, new_name: str):
+ """Copy a :py:class:`Resource` within a `StratPool`."""
+ try:
+ self.rpool[new_name] = self.rpool[resource]
+ except KeyError:
+ msg = f"[!] {resource} not in the resource pool."
+ raise Exception(msg)
+
+ def filter_name(self, cfg: Configuration) -> str:
+ """
+ Return the name of the filter for this strategy.
+
+ In a `StratPool` with filtered movement parameters.
+ """
+ motion_filters = cfg[
+ "functional_preproc",
+ "motion_estimates_and_correction",
+ "motion_estimate_filter",
+ "filters",
+ ]
+ if len(motion_filters) == 1 and cfg.switch_is_on(
+ [
+ "functional_preproc",
+ "motion_estimates_and_correction",
+ "motion_estimate_filter",
+ "run",
+ ],
+ exclusive=True,
+ ):
+ return motion_filters[0]["Name"]
+ try:
+ key = "motion"
+ sidecar = self.get_json("desc-movementParameters_motion")
+ except KeyError:
+ sidecar = None
+ if sidecar is not None and "CpacVariant" in sidecar:
+ if sidecar["CpacVariant"][key]:
+ return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1]
+ return "none"
+
+ def preserve_json_info(self, resource: str, strat_resource: Resource) -> None:
+ """Preserve JSON info when updating a `StratPool`."""
+ data_type = resource.split("_")[-1]
+ if data_type not in self._json["subjson"]:
+ self._json["subjson"][data_type] = {}
+ self._json["subjson"][data_type].update(strat_resource.json)
+
+ @property
+ def regressor_dct(self) -> dict:
+ """Return the regressor dictionary for the current strategy if one exists.
+
+ Raises
+ ------
+ KeyError
+ If regressor dictionary does not exist in current strategy.
+ """
+ # pylint: disable=attribute-defined-outside-init
+ if hasattr(self, "_regressor_dct") and self._regressor_dct: # memoized
+ # pylint: disable=access-member-before-definition
+ return self._regressor_dct
+ key_error = KeyError(
+ "[!] No regressors in resource pool. \n\n"
+ "Try turning on create_regressors or "
+ "ingress_regressors."
+ )
+ _nr = self.cfg["nuisance_corrections", "2-nuisance_regression"]
+ if not hasattr(self, "timeseries"):
+ if _nr["Regressors"]:
+ self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]}
+ else:
+ self.regressors = []
+ if self.check_rpool("parsed_regressors"): # ingressed regressor
+ # name regressor workflow without regressor_prov
+ strat_name = _nr["ingress_regressors"]["Regressors"]["Name"]
+ if strat_name in self.regressors:
+ self._regressor_dct = self.regressors[strat_name]
+ return self._regressor_dct
+ self._regressor_dct = _nr["ingress_regressors"]["Regressors"]
+ return self._regressor_dct
+ prov = self.get_cpac_provenance("desc-confounds_timeseries")
+ strat_name_components = prov[-1].split("_")
+ for _ in list(range(prov[-1].count("_"))):
+ reg_name = "_".join(strat_name_components[-_:])
+ if isinstance(self.regressors, dict) and reg_name in self.regressors:
+ self._regressor_dct = self.regressors[reg_name]
+ return self._regressor_dct
+ raise key_error
+
+ @property
+ def filtered_movement(self) -> bool:
+ """Check if the movement parameters have been filtered in this `StratPool`."""
+ try:
+ return "motion_estimate_filter" in str(
+ self.get_cpac_provenance("desc-movementParameters_motion")
+ )
+ except KeyError:
+ # not a strat_pool or no movement parameters in strat_pool
+ return False
+
+
+def _check_null(val: Any) -> Any:
+ """Return ``None`` if `val` == "none" (case-insensitive)."""
+ if isinstance(val, str):
+ val = None if val.lower() == "none" else val
+ return val
diff --git a/CPAC/pipeline/nodeblock.py b/CPAC/pipeline/nodeblock.py
deleted file mode 100644
index 53b9db1330..0000000000
--- a/CPAC/pipeline/nodeblock.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Copyright (C) 2023-2024 C-PAC Developers
-
-# This file is part of C-PAC.
-
-# C-PAC is free software: you can redistribute it and/or modify it under
-# the terms of the GNU Lesser General Public License as published by the
-# Free Software Foundation, either version 3 of the License, or (at your
-# option) any later version.
-
-# C-PAC is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public
-# License along with C-PAC. If not, see .
-"""Class and decorator for NodeBlock functions."""
-
-from typing import Any, Callable, Optional
-
-
-class NodeBlockFunction:
- """Store a reference to the nodeblock function and all of its meta-data."""
-
- def __init__(
- self,
- func: Callable,
- name: Optional[str] = None,
- config: Optional[list[str]] = None,
- switch: Optional[list[str] | list[list[str]]] = None,
- option_key: Optional[str | list[str]] = None,
- option_val: Optional[str | list[str]] = None,
- inputs: Optional[list[str | list | tuple]] = None,
- outputs: Optional[list[str] | dict[str, Any]] = None,
- ) -> None:
- self.func = func
- """Nodeblock function reference."""
- self.name: Optional[str] = name
- """Used in the graph and logging to identify the NodeBlock and its component nodes."""
- self.config: Optional[list[str]] = config
- """
- Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this
- function. If config is set to ``None``, then all other configuration-related entities must be specified from the
- root of the configuration.
- """
- self.switch: Optional[list[str] | list[list[str]]] = switch
- """
- Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings
- indicates multiple switches that must all be True to run, and is currently only an option if config is set to
- ``None``.
- """
- self.option_key: Optional[str | list[str]] = option_key
- """
- Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock.
- """
- self.option_val: Optional[str | list[str]] = option_val
- """Indicates values for which this NodeBlock should be active."""
- self.inputs: Optional[list[str | list | tuple]] = inputs
- """ResourcePool keys indicating resources needed for the NodeBlock's functionality."""
- self.outputs: Optional[list[str] | dict[str, Any]] = outputs
- """
- ResourcePool keys indicating resources generated or updated by the NodeBlock, optionally including metadata
- for the outputs' respective sidecars.
- """
-
- # Forward function attributes similar to functools.update_wrapper:
- # https://docs.python.org/3/library/functools.html#functools.update_wrapper
- self.__module__ = func.__module__
- self.__name__ = func.__name__
- self.__qualname__ = func.__qualname__
- self.__annotations__ = func.__annotations__
- self.__doc__ = "".join(
- [
- _.replace(" ", "")
- for _ in [func.__doc__, "", "", NodeBlockFunction.__call__.__doc__]
- if _ is not None
- ]
- ).rstrip()
-
- # all node block functions have this signature
- def __call__(self, wf, cfg, strat_pool, pipe_num, opt=None):
- """
-
- Parameters
- ----------
- wf : ~nipype.pipeline.engine.workflows.Workflow
-
- cfg : ~CPAC.utils.configuration.Configuration
-
- strat_pool
-
- pipe_num : int
-
- opt : str, optional
-
- Returns
- -------
- wf : ~nipype.pipeline.engine.workflows.Workflow
-
- out : dict
- """
- return self.func(wf, cfg, strat_pool, pipe_num, opt)
-
- def legacy_nodeblock_dict(self):
- """Return nodeblock metadata as a dictionary.
-
- Helper for compatibility reasons.
- """
- return {
- "name": self.name,
- "config": self.config,
- "switch": self.switch,
- "option_key": self.option_key,
- "option_val": self.option_val,
- "inputs": self.inputs,
- "outputs": self.outputs,
- }
-
- def __repr__(self) -> str:
- """Return reproducible string representation of a NodeBlockFunction."""
- return (
- f"NodeBlockFunction({self.func.__module__}."
- f'{self.func.__name__}, "{self.name}", '
- f"config={self.config}, switch={self.switch}, "
- f"option_key={self.option_key}, option_val="
- f"{self.option_val}, inputs={self.inputs}, "
- f"outputs={self.outputs})"
- )
-
- def __str__(self) -> str:
- """Return string representation of a NodeBlockFunction."""
- return f"NodeBlockFunction({self.name})"
-
-
-def nodeblock(
- name: Optional[str] = None,
- config: Optional[list[str]] = None,
- switch: Optional[list[str] | list[list[str]]] = None,
- option_key: Optional[str | list[str]] = None,
- option_val: Optional[str | list[str]] = None,
- inputs: Optional[list[str | list | tuple]] = None,
- outputs: Optional[list[str] | dict[str, Any]] = None,
-):
- """
- Define a node block.
-
- Connections to the pipeline configuration and to other node blocks.
-
- Parameters
- ----------
- name
- Used in the graph and logging to identify the NodeBlock and its component nodes.
- config
- Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this
- function. If config is set to ``None``, then all other configuration-related entities must be specified from the
- root of the configuration.
- switch
- Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings
- indicates multiple switches that must all be True to run, and is currently only an option if config is set to
- ``None``.
- option_key
- Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock.
- option_val
- Indicates values for which this NodeBlock should be active.
- inputs
- ResourcePool keys indicating files needed for the NodeBlock's functionality.
- outputs
- ResourcePool keys indicating files generated or updated by the NodeBlock, optionally including metadata
- for the outputs' respective sidecars.
- """
- return lambda func: NodeBlockFunction(
- func,
- name if name is not None else func.__name__,
- config,
- switch,
- option_key,
- option_val,
- inputs,
- outputs,
- )
diff --git a/CPAC/pipeline/schema.py b/CPAC/pipeline/schema.py
index 915cb47045..6dc11326d5 100644
--- a/CPAC/pipeline/schema.py
+++ b/CPAC/pipeline/schema.py
@@ -21,6 +21,7 @@
from itertools import chain, permutations
import re
from subprocess import CalledProcessError
+from typing import Any as TypeAny, Optional as TypeOptional
import numpy as np
from pathvalidate import sanitize_filename
@@ -63,18 +64,12 @@
Number = Any(float, int, All(str, Match(SCIENTIFIC_NOTATION_STR_REGEX)))
-def str_to_bool1_1(x): # pylint: disable=invalid-name
- """Convert strings to Booleans for YAML1.1 syntax.
+def str_to_bool1_1(x: TypeAny) -> bool: # pylint: disable=invalid-name
+ """Convert strings to Booleans for YAML1.1 syntax [1]_.
- Ref https://yaml.org/type/bool.html
-
- Parameters
+ References
----------
- x : any
-
- Returns
- -------
- bool
+ .. [1] 2005-01-18. Oren Ben-Kiki, Clark Evans & Brian Ingerson. `"Boolean Language-Independent Type for YAML™ Version 1.1" [Working Draft] `_. Copyright © 2001-2005 Oren Ben-Kiki, Clark Evans, Brian Ingerson.
"""
if isinstance(x, str):
try:
@@ -316,19 +311,9 @@ def str_to_bool1_1(x): # pylint: disable=invalid-name
)
-def name_motion_filter(mfilter, mfilters=None):
+def name_motion_filter(mfilter: dict, mfilters: TypeOptional[list] = None) -> str:
"""Given a motion filter, create a short string for the filename.
- Parameters
- ----------
- mfilter : dict
-
- mfliters : list or None
-
- Returns
- -------
- str
-
Examples
--------
>>> name_motion_filter({'filter_type': 'notch', 'filter_order': 2,
@@ -385,19 +370,8 @@ def name_motion_filter(mfilter, mfilters=None):
return name
-def permutation_message(key, options):
- """Give a human-readable error message for keys that accept permutation values.
-
- Parameters
- ----------
- key: str
-
- options: list or set
-
- Returns
- -------
- msg: str
- """
+def permutation_message(key: str, options: list | set) -> str:
+ """Give a human-readable error message for keys that accept permutation values."""
return f"""
\'{key}\' takes a dictionary with paths to region-of-interest (ROI)
@@ -412,7 +386,7 @@ def permutation_message(key, options):
"""
-def sanitize(filename):
+def sanitize(filename: str) -> str:
"""Sanitize a filename and replace whitespaces with underscores."""
return re.sub(r"\s+", "_", sanitize_filename(filename))
@@ -1253,20 +1227,12 @@ def sanitize(filename):
)
-def schema(config_dict):
+def schema(config_dict: dict) -> dict:
"""Validate a participant-analysis pipeline configuration.
Validate against the latest validation schema by first applying backwards-
compatibility patches, then applying Voluptuous validation, then handling complex
- configuration interaction checks before returning validated config_dict.
-
- Parameters
- ----------
- config_dict : dict
-
- Returns
- -------
- dict
+ configuration interaction checks before returning validated `config_dict`.
"""
from CPAC.utils.utils import _changes_1_8_0_to_1_8_1
diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py
index c228fc3640..07e0e6e5a4 100644
--- a/CPAC/pipeline/test/test_engine.py
+++ b/CPAC/pipeline/test/test_engine.py
@@ -1,154 +1,101 @@
-import os
+# Copyright (C) 2021-2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""Tests for C-PAC pipeline engine."""
+
+from pathlib import Path
import pytest
from CPAC.pipeline.cpac_pipeline import (
build_anat_preproc_stack,
build_workflow,
- connect_pipeline,
- initialize_nipype_wf,
- load_cpac_pipe_config,
-)
-from CPAC.pipeline.engine import (
- ingress_pipeconfig_paths,
- ingress_raw_anat_data,
- ingress_raw_func_data,
- initiate_rpool,
- ResourcePool,
)
+from CPAC.pipeline.engine import ResourcePool
from CPAC.utils.bids_utils import create_cpac_data_config
-
-
-@pytest.mark.skip(reason="not a pytest test")
-def test_ingress_func_raw_data(pipe_config, bids_dir, test_dir):
- sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0]
- cfg = load_cpac_pipe_config(pipe_config)
-
- cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out")
- cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work")
-
- wf = initialize_nipype_wf(cfg, sub_data_dct)
-
- part_id = sub_data_dct["subject_id"]
- ses_id = sub_data_dct["unique_id"]
-
- unique_id = f"{part_id}_{ses_id}"
-
- rpool = ResourcePool(name=unique_id, cfg=cfg)
-
- if "func" in sub_data_dct:
- wf, rpool, diff, blip, fmap_rp_list = ingress_raw_func_data(
- wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id
- )
-
- rpool.gather_pipes(wf, cfg, all=True)
-
- wf.run()
-
-
-@pytest.mark.skip(reason="not a pytest test")
-def test_ingress_anat_raw_data(pipe_config, bids_dir, test_dir):
- sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0]
- cfg = load_cpac_pipe_config(pipe_config)
-
- cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out")
- cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work")
-
- wf = initialize_nipype_wf(cfg, sub_data_dct)
-
- part_id = sub_data_dct["subject_id"]
- ses_id = sub_data_dct["unique_id"]
-
- unique_id = f"{part_id}_{ses_id}"
-
- rpool = ResourcePool(name=unique_id, cfg=cfg)
-
- rpool = ingress_raw_anat_data(
- wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id
+from CPAC.utils.configuration import Configuration, Preconfiguration
+
+
+def _set_up_test(
+ bids_examples: Path, preconfig: str, tmp_path: Path
+) -> tuple[Configuration, dict]:
+ """Set up `cfg` and `sub_data` for engine tests."""
+ bids_dir = str(bids_examples / "ds051")
+ sub_data = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0]
+ cfg = Preconfiguration(preconfig)
+ cfg.pipeline_setup["output_directory"]["path"] = str(tmp_path / "out")
+ cfg.pipeline_setup["working_directory"]["path"] = str(tmp_path / "work")
+ cfg.pipeline_setup["log_directory"]["path"] = str(tmp_path / "logs")
+ return cfg, sub_data
+
+
+@pytest.mark.parametrize("preconfig", ["default"])
+def test_ingress_func_raw_data(
+ bids_examples: Path, preconfig: str, tmp_path: Path
+) -> None:
+ """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data` ."""
+ cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path)
+ rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct)
+ rpool.gather_pipes(rpool.wf, cfg, all_types=True)
+
+
+@pytest.mark.parametrize("preconfig", ["default"])
+def test_ingress_anat_raw_data(
+ bids_examples: Path, preconfig: str, tmp_path: Path
+) -> None:
+ """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_anat_data` ."""
+ cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path)
+ rpool = ResourcePool(
+ cfg=cfg,
+ data_paths=sub_data_dct,
)
+ rpool.ingress_raw_anat_data()
+ rpool.gather_pipes(rpool.wf, cfg, all_types=True)
+
+
+@pytest.mark.parametrize("preconfig", ["default"])
+def test_ingress_pipeconfig_data(
+ bids_examples: Path, preconfig: str, tmp_path: Path
+) -> None:
+ """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_pipeconfig_paths` ."""
+ cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path)
+ rpool = ResourcePool(
+ cfg=cfg,
+ data_paths=sub_data_dct,
+ )
+ rpool.gather_pipes(rpool.wf, cfg, all_types=True)
- rpool.gather_pipes(wf, cfg, all=True)
-
- wf.run()
-
-
-@pytest.mark.skip(reason="not a pytest test")
-def test_ingress_pipeconfig_data(pipe_config, bids_dir, test_dir):
- sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0]
- cfg = load_cpac_pipe_config(pipe_config)
-
- cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out")
- cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work")
- cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs")
-
- wf = initialize_nipype_wf(cfg, sub_data_dct)
-
- part_id = sub_data_dct["subject_id"]
- ses_id = sub_data_dct["unique_id"]
-
- unique_id = f"{part_id}_{ses_id}"
-
- rpool = ResourcePool(name=unique_id, cfg=cfg)
-
- rpool = ingress_pipeconfig_paths(cfg, rpool, sub_data_dct, unique_id)
-
- rpool.gather_pipes(wf, cfg, all=True)
-
- wf.run()
-
-
-@pytest.mark.skip(reason="not a pytest test")
-def test_build_anat_preproc_stack(pipe_config, bids_dir, test_dir):
- sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0]
- cfg = load_cpac_pipe_config(pipe_config)
-
- cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out")
- cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work")
- cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs")
-
- wf = initialize_nipype_wf(cfg, sub_data_dct)
- wf, rpool = initiate_rpool(wf, cfg, sub_data_dct)
+@pytest.mark.parametrize("preconfig", ["anat-only"])
+def test_build_anat_preproc_stack(
+ bids_examples: Path, preconfig: str, tmp_path: Path
+) -> None:
+ """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_anat_preproc_stack` ."""
+ cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path)
+ rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct)
pipeline_blocks = build_anat_preproc_stack(rpool, cfg)
- wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks)
-
+ wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks)
rpool.gather_pipes(wf, cfg)
- wf.run()
-
-
-@pytest.mark.skip(reason="not a pytest test")
-def test_build_workflow(pipe_config, bids_dir, test_dir):
- sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0]
- cfg = load_cpac_pipe_config(pipe_config)
-
- cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out")
- cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work")
- cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs")
-
- wf = initialize_nipype_wf(cfg, sub_data_dct)
-
- wf, rpool = initiate_rpool(wf, cfg, sub_data_dct)
-
- wf, _, _ = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg)
+@pytest.mark.parametrize("preconfig", ["default"])
+def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None:
+ """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_workflow` ."""
+ cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path)
+ rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct)
+ wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg)
rpool.gather_pipes(wf, cfg)
-
- wf.run()
-
-
-# bids_dir = "/Users/steven.giavasis/data/HBN-SI_dataset/rawdata"
-# test_dir = "/test_dir"
-
-# cfg = "/Users/hecheng.jin/GitHub/DevBranch/CPAC/resources/configs/pipeline_config_monkey-ABCD.yml"
-cfg = "/Users/hecheng.jin/GitHub/pipeline_config_monkey-ABCDlocal.yml"
-bids_dir = "/Users/hecheng.jin/Monkey/monkey_data_oxford/site-ucdavis"
-test_dir = "/Users/hecheng.jin/GitHub/Test/T2preproc"
-
-# test_ingress_func_raw_data(cfg, bids_dir, test_dir)
-# test_ingress_anat_raw_data(cfg, bids_dir, test_dir)
-# test_ingress_pipeconfig_data(cfg, bids_dir, test_dir)
-# test_build_anat_preproc_stack(cfg, bids_dir, test_dir)
-if __name__ == "__main__":
- test_build_workflow(cfg, bids_dir, test_dir)
diff --git a/CPAC/pipeline/utils.py b/CPAC/pipeline/utils.py
index 39acb6429f..6f6953fef2 100644
--- a/CPAC/pipeline/utils.py
+++ b/CPAC/pipeline/utils.py
@@ -18,31 +18,16 @@
from itertools import chain
-from CPAC.func_preproc.func_motion import motion_estimate_filter
from CPAC.utils.bids_utils import insert_entity
+from CPAC.utils.configuration.configuration import Configuration
-MOVEMENT_FILTER_KEYS = motion_estimate_filter.outputs
+def name_fork(
+ resource_idx: str, cfg: Configuration, json_info: dict, out_dct: dict
+) -> tuple[str, dict]:
+ """Create and insert entities for forkpoints."""
+ from CPAC.func_preproc.func_motion import motion_estimate_filter
-def name_fork(resource_idx, cfg, json_info, out_dct):
- """Create and insert entities for forkpoints.
-
- Parameters
- ----------
- resource_idx : str
-
- cfg : CPAC.utils.configuration.Configuration
-
- json_info : dict
-
- out_dct : dict
-
- Returns
- -------
- resource_idx : str
-
- out_dct : dict
- """
if cfg.switch_is_on(
[
"functional_preproc",
@@ -54,7 +39,7 @@ def name_fork(resource_idx, cfg, json_info, out_dct):
filt_value = None
_motion_variant = {
_key: json_info["CpacVariant"][_key]
- for _key in MOVEMENT_FILTER_KEYS
+ for _key in motion_estimate_filter.outputs
if _key in json_info.get("CpacVariant", {})
}
if "unfiltered-" in resource_idx:
@@ -105,12 +90,6 @@ def present_outputs(outputs: dict, keys: list) -> dict:
NodeBlocks that differ only by configuration options and relevant
output keys.
- Parameters
- ----------
- outputs : dict
-
- keys : list of str
-
Returns
-------
dict
diff --git a/CPAC/qc/pipeline.py b/CPAC/qc/pipeline.py
index 15d6b35e09..fd39ed5193 100644
--- a/CPAC/qc/pipeline.py
+++ b/CPAC/qc/pipeline.py
@@ -1,7 +1,7 @@
import pkg_resources as p
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.qc.qc import (
afni_Edge3,
create_montage,
diff --git a/CPAC/qc/xcp.py b/CPAC/qc/xcp.py
index 95cb870430..61bb008a0e 100644
--- a/CPAC/qc/xcp.py
+++ b/CPAC/qc/xcp.py
@@ -67,13 +67,15 @@
import pandas as pd
import nibabel as nib
from nipype.interfaces import afni, fsl
+from nipype.pipeline.engine import Node, Workflow
from CPAC.generate_motion_statistics.generate_motion_statistics import (
DVARS_strip_t0,
ImageTo1D,
)
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
+from CPAC.pipeline.engine.resource import StratPool
from CPAC.qc.qcmetrics import regisQ
from CPAC.utils.interfaces.function import Function
@@ -85,33 +87,29 @@
]
-def _connect_motion(wf, nodes, strat_pool, qc_file, pipe_num):
+def _connect_motion(
+ wf: Workflow, nodes: dict, strat_pool: StratPool, qc_file: Node, pipe_num: int
+) -> Workflow:
"""
Connect the motion metrics to the workflow.
Parameters
----------
- wf : nipype.pipeline.engine.Workflow
+ wf
The workflow to connect the motion metrics to.
- nodes : dict
+ nodes
Dictionary of nodes already collected from the strategy pool.
- strat_pool : CPAC.pipeline.engine.ResourcePool
+ strat_pool
The current strategy pool.
- qc_file : nipype.pipeline.engine.Node
- A function node with the function ``generate_xcp_qc``.
-
- pipe_num : int
-
- Returns
- -------
- wf : nipype.pipeline.engine.Workflow
+ qc_file
+ A function node with the function :py:func:`generate_xcp_qc` .
"""
# pylint: disable=invalid-name, too-many-arguments
try:
- nodes = {**nodes, "censor-indices": strat_pool.node_data("censor-indices")}
+ nodes = {**nodes, "censor-indices": strat_pool.get_data("censor-indices")}
wf.connect(
nodes["censor-indices"].node,
nodes["censor-indices"].out,
@@ -501,7 +499,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None):
)
qc_file.inputs.desc = "preproc"
qc_file.inputs.regressors = (
- strat_pool.node_data("regressors")
+ strat_pool.get_data("regressors")
.node.name.split("regressors_")[-1][::-1]
.split("_", 1)[-1][::-1]
)
@@ -511,7 +509,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None):
op_string="-bin ",
)
nodes = {
- key: strat_pool.node_data(key)
+ key: strat_pool.get_data(key)
for key in [
"bold",
"desc-preproc_bold",
@@ -526,13 +524,13 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None):
]
if strat_pool.check_rpool(key)
}
- nodes["bold2template_mask"] = strat_pool.node_data(
+ nodes["bold2template_mask"] = strat_pool.get_data(
["space-template_desc-bold_mask", "space-EPItemplate_desc-bold_mask"]
)
- nodes["template_mask"] = strat_pool.node_data(
+ nodes["template_mask"] = strat_pool.get_data(
["T1w-brain-template-mask", "EPI-template-mask"]
)
- nodes["template"] = strat_pool.node_data(
+ nodes["template"] = strat_pool.get_data(
["T1w-brain-template-funcreg", "EPI-brain-template-funcreg"]
)
resample_bold_mask_to_template = pe.Node(
diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py
index da63e694e4..3673b267cf 100644
--- a/CPAC/registration/registration.py
+++ b/CPAC/registration/registration.py
@@ -17,7 +17,7 @@
# pylint: disable=too-many-lines,ungrouped-imports,wrong-import-order
"""Workflows for registration."""
-from typing import Optional
+from typing import Optional, TYPE_CHECKING
from voluptuous import RequiredFieldInvalid
from nipype.interfaces import afni, ants, c3, fsl, utility as util
@@ -26,7 +26,7 @@
from CPAC.anat_preproc.lesion_preproc import create_lesion_preproc
from CPAC.func_preproc.utils import chunk_ts, split_ts_chunks
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.registration.utils import (
change_itk_transform_type,
check_transforms,
@@ -39,10 +39,14 @@
seperate_warps_list,
single_ants_xfm_to_list,
)
+from CPAC.utils.configuration.configuration import Configuration
from CPAC.utils.interfaces import Function
from CPAC.utils.interfaces.fsl import Merge as fslMerge
from CPAC.utils.utils import check_prov_for_motion_tool, check_prov_for_regtool
+if TYPE_CHECKING:
+ from CPAC.pipeline.engine.resource import StratPool
+
def apply_transform(
wf_name,
@@ -2616,7 +2620,7 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None):
node, out = connect
wf.connect(node, out, ants_rc, "inputspec.input_brain")
- t1w_brain_template = strat_pool.node_data("T1w-brain-template")
+ t1w_brain_template = strat_pool.get_data("T1w-brain-template")
wf.connect(
t1w_brain_template.node,
t1w_brain_template.out,
@@ -2635,10 +2639,10 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None):
)
wf.connect(node, out, ants_rc, "inputspec.input_head")
- t1w_template = strat_pool.node_data("T1w-template")
+ t1w_template = strat_pool.get_data("T1w-template")
wf.connect(t1w_template.node, t1w_template.out, ants_rc, "inputspec.reference_head")
- brain_mask = strat_pool.node_data(
+ brain_mask = strat_pool.get_data(
[
"space-T1w_desc-brain_mask",
"space-longitudinal_desc-brain_mask",
@@ -5416,8 +5420,8 @@ def warp_tissuemask_to_template(wf, cfg, strat_pool, pipe_num, xfm, template_spa
def warp_resource_to_template(
wf: pe.Workflow,
- cfg,
- strat_pool,
+ cfg: Configuration,
+ strat_pool: "StratPool",
pipe_num: int,
input_resource: list[str] | str,
xfm: str,
@@ -5428,24 +5432,24 @@ def warp_resource_to_template(
Parameters
----------
- wf : pe.Workflow
+ wf
- cfg : CPAC.utils.configuration.Configuration
+ cfg
- strat_pool : CPAC.pipeline.engine.ResourcePool
+ strat_pool
- pipe_num : int
+ pipe_num
- input_resource : str or list
+ input_resource
key for the resource to warp to template
- xfm : str
+ xfm
key for the transform to apply
- reference : str, optional
+ reference
key for reference if not using f'{template_space}-template'
- time_series : boolean, optional
+ time_series
resource to transform is 4D?
Returns
diff --git a/CPAC/reho/reho.py b/CPAC/reho/reho.py
index 870d3fa36d..b19ad9ecc7 100644
--- a/CPAC/reho/reho.py
+++ b/CPAC/reho/reho.py
@@ -18,7 +18,7 @@
import nipype.interfaces.utility as util
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.reho.utils import *
from CPAC.utils.interfaces import Function
diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py
index 13a4f72745..0c70370f7f 100644
--- a/CPAC/resources/tests/test_templates.py
+++ b/CPAC/resources/tests/test_templates.py
@@ -16,27 +16,37 @@
# License along with C-PAC. If not, see .
"""Tests for packaged templates."""
+from importlib.util import find_spec
import os
import pytest
from CPAC.pipeline import ALL_PIPELINE_CONFIGS
-from CPAC.pipeline.engine import ingress_pipeconfig_paths, ResourcePool
+from CPAC.pipeline.engine import ResourcePool
from CPAC.utils.configuration import Preconfiguration
from CPAC.utils.datasource import get_highest_local_res
-@pytest.mark.parametrize("pipeline", ALL_PIPELINE_CONFIGS)
+@pytest.mark.parametrize(
+ "pipeline",
+ [
+ pytest.param(
+ config,
+ marks=pytest.mark.skipif(
+ not find_spec("torch"), reason="torch required for NHP configs."
+ ),
+ )
+ if config in ["monkey", "nhp-macaque"]
+ else config
+ for config in ALL_PIPELINE_CONFIGS
+ ],
+)
def test_packaged_path_exists(pipeline):
- """
- Check that all local templates are included in image at at
- least one resolution.
- """
- rpool = ingress_pipeconfig_paths(
- Preconfiguration(pipeline), ResourcePool(), "pytest"
- )
+ """Check that all local templates are included in at least one resolution."""
+ rpool = ResourcePool(cfg=Preconfiguration(pipeline), part_id="pytest")
+ rpool.ingress_pipeconfig_paths()
for resource in rpool.rpool.values():
- node = next(iter(resource.values())).get("data")[0]
+ node = next(iter(resource.values())).data[0]
if hasattr(node.inputs, "template") and not node.inputs.template.startswith(
"s3:"
):
diff --git a/CPAC/sca/sca.py b/CPAC/sca/sca.py
index d12aae7de9..bf855d578a 100644
--- a/CPAC/sca/sca.py
+++ b/CPAC/sca/sca.py
@@ -18,7 +18,7 @@
from nipype.interfaces.afni import preprocess
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.sca.utils import *
from CPAC.timeseries.timeseries_analysis import (
get_roi_timeseries,
diff --git a/CPAC/seg_preproc/seg_preproc.py b/CPAC/seg_preproc/seg_preproc.py
index f769cf14b3..1fe3f4045f 100644
--- a/CPAC/seg_preproc/seg_preproc.py
+++ b/CPAC/seg_preproc/seg_preproc.py
@@ -19,7 +19,7 @@
from CPAC.anat_preproc.utils import mri_convert
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.registration.registration import apply_transform
from CPAC.registration.utils import check_transforms, generate_inverse_transform_flags
from CPAC.seg_preproc.utils import (
diff --git a/CPAC/surface/surf_preproc.py b/CPAC/surface/surf_preproc.py
index 1defe4e2d1..7959688f86 100644
--- a/CPAC/surface/surf_preproc.py
+++ b/CPAC/surface/surf_preproc.py
@@ -17,7 +17,7 @@
import os
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.surface.PostFreeSurfer.surf_reho import run_surf_reho
from CPAC.utils.interfaces import Function
diff --git a/CPAC/timeseries/timeseries_analysis.py b/CPAC/timeseries/timeseries_analysis.py
index a56bc33c74..18b1a4851a 100644
--- a/CPAC/timeseries/timeseries_analysis.py
+++ b/CPAC/timeseries/timeseries_analysis.py
@@ -22,7 +22,7 @@
get_connectome_method,
)
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.utils.datasource import (
create_roi_mask_dataflow,
create_spatial_map_dataflow,
diff --git a/CPAC/utils/bids_utils.py b/CPAC/utils/bids_utils.py
index 34e72d430e..08e6edb989 100755
--- a/CPAC/utils/bids_utils.py
+++ b/CPAC/utils/bids_utils.py
@@ -14,6 +14,9 @@
# You should have received a copy of the GNU Lesser General Public
# License along with C-PAC. If not, see .
+"""Utilities for using BIDS data."""
+
+from base64 import b64decode
import json
import os
import re
@@ -91,8 +94,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True):
)
if raise_error:
raise ValueError(msg)
- else:
- UTLOGGER.error(msg)
+ UTLOGGER.error(msg)
elif not f_dict["scantype"]:
msg = (
f"Filename ({fname}) does not appear to contain"
@@ -100,8 +102,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True):
)
if raise_error:
raise ValueError(msg)
- else:
- UTLOGGER.error(msg)
+ UTLOGGER.error(msg)
elif "bold" in f_dict["scantype"] and not f_dict["task"]:
msg = (
f"Filename ({fname}) is a BOLD file, but doesn't contain a task, does"
@@ -109,15 +110,13 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True):
)
if raise_error:
raise ValueError(msg)
- else:
- UTLOGGER.error(msg)
+ UTLOGGER.error(msg)
return f_dict
def bids_entities_from_filename(filename):
- """Function to collect a list of BIDS entities from a given
- filename.
+ """Collect a list of BIDS entities from a given filename.
Parameters
----------
@@ -142,7 +141,7 @@ def bids_entities_from_filename(filename):
def bids_match_entities(file_list, entities, suffix):
- """Function to subset a list of filepaths by a passed BIDS entity.
+ """Subset a list of filepaths by a passed BIDS entity.
Parameters
----------
@@ -250,10 +249,9 @@ def bids_remove_entity(name, key):
def bids_retrieve_params(bids_config_dict, f_dict, dbg=False):
- """
+ """Retrieve BIDS parameters for BIDS file corresponding to f_dict.
- Retrieve the BIDS parameters from bids_config_dict for BIDS file
- corresponding to f_dict. If an exact match for f_dict is not found
+ If an exact match for f_dict is not found
the nearest match is returned, corresponding to the BIDS inheritance
principle.
@@ -316,12 +314,10 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False):
return params
-def bids_parse_sidecar(config_dict, dbg=False, raise_error=True):
- # type: (dict, bool) -> dict
- """
- Uses the BIDS principle of inheritance to build a data structure that
- maps parameters in side car .json files to components in the names of
- corresponding nifti files.
+def bids_parse_sidecar(
+ config_dict: dict, dbg: bool = False, raise_error: bool = True
+) -> dict:
+ """Use BIDS inheritance to map parameters in sidecar to corresponding NIfTI files.
:param config_dict: dictionary that maps paths of sidecar json files
(the key) to a dictionary containing the contents of the files (the values)
@@ -428,9 +424,9 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True):
def bids_shortest_entity(file_list):
- """Function to return the single file with the shortest chain of
- BIDS entities from a given list, returning the first if more than
- one have the same minimum length.
+ """Return the single file with the shortest chain of BIDS entities from a list.
+
+ Return the first if more than one have the same minimum length.
Parameters
----------
@@ -553,9 +549,7 @@ def bids_gen_cpac_sublist(
raise_error=True,
only_one_anat=True,
):
- """
- Generates a CPAC formatted subject list from information contained in a
- BIDS formatted set of data.
+ """Generate a CPAC formatted subject list from a BIDS dataset.
Parameters
----------
@@ -910,8 +904,9 @@ def camelCase(string: str) -> str: # pylint: disable=invalid-name
def combine_multiple_entity_instances(bids_str: str) -> str:
- """Combines mutliple instances of a key in a BIDS string to a single
- instance by camelCasing and concatenating the values.
+ """Combine mutliple instances of a key in a BIDS string to a single instance.
+
+ camelCase and concatenate the values.
Parameters
----------
@@ -950,8 +945,7 @@ def combine_multiple_entity_instances(bids_str: str) -> str:
def insert_entity(resource, key, value):
- """Insert a `f'{key}-{value}'` BIDS entity before `desc-` if
- present or before the suffix otherwise.
+ """Insert a BIDS entity before `desc-` if present or before the suffix otherwise.
Parameters
----------
@@ -983,7 +977,8 @@ def insert_entity(resource, key, value):
return "_".join([*new_entities[0], f"{key}-{value}", *new_entities[1], suff])
-def load_yaml_config(config_filename, aws_input_creds):
+def load_yaml_config(config_filename: str, aws_input_creds: str) -> dict | list:
+ """Load a YAML configuration file, locally or from AWS."""
if config_filename.lower().startswith("data:"):
try:
header, encoded = config_filename.split(",", 1)
@@ -1020,8 +1015,7 @@ def load_yaml_config(config_filename, aws_input_creds):
def cl_strip_brackets(arg_list):
- """Removes '[' from before first and ']' from after final
- arguments in a list of commandline arguments.
+ """Remove '[' from before first and ']' from after final arguments.
Parameters
----------
@@ -1051,7 +1045,7 @@ def create_cpac_data_config(
aws_input_creds=None,
skip_bids_validator=False,
only_one_anat=True,
-):
+) -> list[dict]:
"""
Create a C-PAC data config YAML file from a BIDS directory.
@@ -1111,8 +1105,7 @@ def create_cpac_data_config(
def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds):
- """
- Loads the file as a check to make sure it is available and readable.
+ """Load the file to make sure it is available and readable.
Parameters
----------
@@ -1210,8 +1203,7 @@ def res_in_filename(cfg, label):
def sub_list_filter_by_labels(sub_list, labels):
- """Function to filter a sub_list by provided BIDS labels for
- specified suffixes.
+ """Filter a sub_list by provided BIDS labels for specified suffixes.
Parameters
----------
@@ -1287,7 +1279,7 @@ def without_key(entity: str, key: str) -> str:
def _t1w_filter(anat, shortest_entity, label):
- """Helper function to filter T1w paths.
+ """Filter T1w paths.
Parameters
----------
@@ -1318,7 +1310,7 @@ def _t1w_filter(anat, shortest_entity, label):
def _sub_anat_filter(anat, shortest_entity, label):
- """Helper function to filter anat paths in sub_list.
+ """Filter anat paths in sub_list.
Parameters
----------
@@ -1341,7 +1333,7 @@ def _sub_anat_filter(anat, shortest_entity, label):
def _sub_list_filter_by_label(sub_list, label_type, label):
- """Function to filter a sub_list by a CLI-provided label.
+ """Filter a sub_list by a CLI-provided label.
Parameters
----------
@@ -1410,7 +1402,7 @@ def _sub_list_filter_by_label(sub_list, label_type, label):
def _match_functional_scan(sub_list_func_dict, scan_file_to_match):
- """Function to subset a scan from a sub_list_func_dict by a scan filename.
+ """Subset a scan from a sub_list_func_dict by a scan filename.
Parameters
----------
diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py
index 008e674c2d..8eba26bf21 100644
--- a/CPAC/utils/datasource.py
+++ b/CPAC/utils/datasource.py
@@ -20,6 +20,7 @@
import json
from pathlib import Path
import re
+from typing import Optional
from voluptuous import RequiredFieldInvalid
from nipype.interfaces import utility as util
@@ -30,7 +31,6 @@
from CPAC.utils.bids_utils import bids_remove_entity
from CPAC.utils.interfaces.function import Function
from CPAC.utils.monitoring import FMLOGGER
-from CPAC.utils.utils import get_scan_params
def bidsier_prefix(unique_id):
@@ -64,7 +64,8 @@ def bidsier_prefix(unique_id):
return "_".join(components)
-def get_rest(scan, rest_dict, resource="scan"):
+@Function.sig_imports(["from pathlib import Path"])
+def get_rest(scan: str, rest_dict: dict, resource: str = "scan") -> Path | str:
"""Return the path of the chosen resource in the functional file dictionary.
scan: the scan/series name or label
@@ -127,7 +128,7 @@ def select_model_files(model, ftest, model_name):
return fts_file, con_file, grp_file, mat_file
-def check_func_scan(func_scan_dct, scan):
+def check_func_scan(func_scan_dct: dict, scan: str) -> None:
"""Run some checks on the functional timeseries-related files.
For a given series/scan name or label.
@@ -168,119 +169,6 @@ def check_func_scan(func_scan_dct, scan):
raise ValueError(msg)
-def create_func_datasource(rest_dict, rpool, wf_name="func_datasource"):
- """Return the functional timeseries-related file paths for each series/scan...
-
- ...from the dictionary of functional files described in the data
- configuration (sublist) YAML file.
-
- Scan input (from inputnode) is an iterable.
- """
- import nipype.interfaces.utility as util
-
- from CPAC.pipeline import nipype_pipeline_engine as pe
-
- wf = pe.Workflow(name=wf_name)
-
- inputnode = pe.Node(
- util.IdentityInterface(
- fields=["subject", "scan", "creds_path", "dl_dir"], mandatory_inputs=True
- ),
- name="inputnode",
- )
-
- outputnode = pe.Node(
- util.IdentityInterface(
- fields=["subject", "rest", "scan", "scan_params", "phase_diff", "magnitude"]
- ),
- name="outputspec",
- )
-
- # have this here for now because of the big change in the data
- # configuration format
- # (Not necessary with ingress - format does not comply)
- if not rpool.check_rpool("derivatives-dir"):
- check_scan = pe.Node(
- function.Function(
- input_names=["func_scan_dct", "scan"],
- output_names=[],
- function=check_func_scan,
- as_module=True,
- ),
- name="check_func_scan",
- )
-
- check_scan.inputs.func_scan_dct = rest_dict
- wf.connect(inputnode, "scan", check_scan, "scan")
-
- # get the functional scan itself
- selectrest = pe.Node(
- function.Function(
- input_names=["scan", "rest_dict", "resource"],
- output_names=["file_path"],
- function=get_rest,
- as_module=True,
- ),
- name="selectrest",
- )
- selectrest.inputs.rest_dict = rest_dict
- selectrest.inputs.resource = "scan"
- wf.connect(inputnode, "scan", selectrest, "scan")
-
- # check to see if it's on an Amazon AWS S3 bucket, and download it, if it
- # is - otherwise, just return the local file path
- check_s3_node = pe.Node(
- function.Function(
- input_names=["file_path", "creds_path", "dl_dir", "img_type"],
- output_names=["local_path"],
- function=check_for_s3,
- as_module=True,
- ),
- name="check_for_s3",
- )
-
- wf.connect(selectrest, "file_path", check_s3_node, "file_path")
- wf.connect(inputnode, "creds_path", check_s3_node, "creds_path")
- wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir")
- check_s3_node.inputs.img_type = "func"
-
- wf.connect(inputnode, "subject", outputnode, "subject")
- wf.connect(check_s3_node, "local_path", outputnode, "rest")
- wf.connect(inputnode, "scan", outputnode, "scan")
-
- # scan parameters CSV
- select_scan_params = pe.Node(
- function.Function(
- input_names=["scan", "rest_dict", "resource"],
- output_names=["file_path"],
- function=get_rest,
- as_module=True,
- ),
- name="select_scan_params",
- )
- select_scan_params.inputs.rest_dict = rest_dict
- select_scan_params.inputs.resource = "scan_parameters"
- wf.connect(inputnode, "scan", select_scan_params, "scan")
-
- # if the scan parameters file is on AWS S3, download it
- s3_scan_params = pe.Node(
- function.Function(
- input_names=["file_path", "creds_path", "dl_dir", "img_type"],
- output_names=["local_path"],
- function=check_for_s3,
- as_module=True,
- ),
- name="s3_scan_params",
- )
-
- wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path")
- wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path")
- wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir")
- wf.connect(s3_scan_params, "local_path", outputnode, "scan_params")
-
- return wf
-
-
def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"):
"""Return the field map files...
@@ -374,7 +262,7 @@ def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"):
return wf
-def get_fmap_phasediff_metadata(data_config_scan_params):
+def get_fmap_phasediff_metadata(data_config_scan_params: dict | str):
"""Return the scan parameters for a field map phasediff scan."""
if (
not isinstance(data_config_scan_params, dict)
@@ -513,298 +401,6 @@ def match_epi_fmaps(
return (opposite_pe_epi, same_pe_epi)
-def ingress_func_metadata(
- wf,
- cfg,
- rpool,
- sub_dict,
- subject_id,
- input_creds_path,
- unique_id=None,
- num_strat=None,
-):
- """Ingress metadata for functional scans."""
- name_suffix = ""
- for suffix_part in (unique_id, num_strat):
- if suffix_part is not None:
- name_suffix += f"_{suffix_part}"
- # Grab field maps
- diff = False
- blip = False
- fmap_rp_list = []
- fmap_TE_list = []
- if "fmap" in sub_dict:
- second = False
- for orig_key in sub_dict["fmap"]:
- gather_fmap = create_fmap_datasource(
- sub_dict["fmap"], f"fmap_gather_{orig_key}_{subject_id}"
- )
- gather_fmap.inputs.inputnode.set(
- subject=subject_id,
- creds_path=input_creds_path,
- dl_dir=cfg.pipeline_setup["working_directory"]["path"],
- )
- gather_fmap.inputs.inputnode.scan = orig_key
-
- key = orig_key
- if "epi" in key and not second:
- key = "epi-1"
- second = True
- elif "epi" in key and second:
- key = "epi-2"
-
- rpool.set_data(key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress")
- rpool.set_data(
- f"{key}-scan-params",
- gather_fmap,
- "outputspec.scan_params",
- {},
- "",
- "fmap_params_ingress",
- )
-
- fmap_rp_list.append(key)
-
- get_fmap_metadata_imports = ["import json"]
- get_fmap_metadata = pe.Node(
- Function(
- input_names=["data_config_scan_params"],
- output_names=[
- "dwell_time",
- "pe_direction",
- "total_readout",
- "echo_time",
- "echo_time_one",
- "echo_time_two",
- ],
- function=get_fmap_phasediff_metadata,
- imports=get_fmap_metadata_imports,
- ),
- name=f"{key}_get_metadata{name_suffix}",
- )
-
- wf.connect(
- gather_fmap,
- "outputspec.scan_params",
- get_fmap_metadata,
- "data_config_scan_params",
- )
-
- if "phase" in key:
- # leave it open to all three options, in case there is a
- # phasediff image with either a single EchoTime field (which
- # usually matches one of the magnitude EchoTimes), OR
- # a phasediff with an EchoTime1 and EchoTime2
-
- # at least one of these rpool keys will have a None value,
- # which will be sorted out in gather_echo_times below
- rpool.set_data(
- f"{key}-TE",
- get_fmap_metadata,
- "echo_time",
- {},
- "",
- "fmap_TE_ingress",
- )
- fmap_TE_list.append(f"{key}-TE")
-
- rpool.set_data(
- f"{key}-TE1",
- get_fmap_metadata,
- "echo_time_one",
- {},
- "",
- "fmap_TE1_ingress",
- )
- fmap_TE_list.append(f"{key}-TE1")
-
- rpool.set_data(
- f"{key}-TE2",
- get_fmap_metadata,
- "echo_time_two",
- {},
- "",
- "fmap_TE2_ingress",
- )
- fmap_TE_list.append(f"{key}-TE2")
-
- elif "magnitude" in key:
- rpool.set_data(
- f"{key}-TE",
- get_fmap_metadata,
- "echo_time",
- {},
- "",
- "fmap_TE_ingress",
- )
- fmap_TE_list.append(f"{key}-TE")
-
- rpool.set_data(
- f"{key}-dwell",
- get_fmap_metadata,
- "dwell_time",
- {},
- "",
- "fmap_dwell_ingress",
- )
- rpool.set_data(
- f"{key}-pedir",
- get_fmap_metadata,
- "pe_direction",
- {},
- "",
- "fmap_pedir_ingress",
- )
- rpool.set_data(
- f"{key}-total-readout",
- get_fmap_metadata,
- "total_readout",
- {},
- "",
- "fmap_readout_ingress",
- )
-
- if "phase" in key or "mag" in key:
- diff = True
-
- if re.match("epi_[AP]{2}", orig_key):
- blip = True
-
- if diff:
- calc_delta_ratio = pe.Node(
- Function(
- input_names=["effective_echo_spacing", "echo_times"],
- output_names=["deltaTE", "ees_asym_ratio"],
- function=calc_delta_te_and_asym_ratio,
- imports=["from typing import Optional"],
- ),
- name=f"diff_distcor_calc_delta{name_suffix}",
- )
-
- gather_echoes = pe.Node(
- Function(
- input_names=[
- "echotime_1",
- "echotime_2",
- "echotime_3",
- "echotime_4",
- ],
- output_names=["echotime_list"],
- function=gather_echo_times,
- ),
- name="fugue_gather_echo_times",
- )
-
- for idx, fmap_file in enumerate(fmap_TE_list, start=1):
- try:
- node, out_file = rpool.get(fmap_file)[
- f"['{fmap_file}:fmap_TE_ingress']"
- ]["data"]
- wf.connect(node, out_file, gather_echoes, f"echotime_{idx}")
- except KeyError:
- pass
-
- wf.connect(gather_echoes, "echotime_list", calc_delta_ratio, "echo_times")
-
- # Add in nodes to get parameters from configuration file
- # a node which checks if scan_parameters are present for each scan
- scan_params = pe.Node(
- Function(
- input_names=[
- "data_config_scan_params",
- "subject_id",
- "scan",
- "pipeconfig_tr",
- "pipeconfig_tpattern",
- "pipeconfig_start_indx",
- "pipeconfig_stop_indx",
- ],
- output_names=[
- "tr",
- "tpattern",
- "template",
- "ref_slice",
- "start_indx",
- "stop_indx",
- "pe_direction",
- "effective_echo_spacing",
- ],
- function=get_scan_params,
- ),
- name=f"bold_scan_params_{subject_id}{name_suffix}",
- )
- scan_params.inputs.subject_id = subject_id
- scan_params.inputs.set(
- pipeconfig_start_indx=cfg.functional_preproc["truncation"]["start_tr"],
- pipeconfig_stop_indx=cfg.functional_preproc["truncation"]["stop_tr"],
- )
-
- node, out = rpool.get("scan")["['scan:func_ingress']"]["data"]
- wf.connect(node, out, scan_params, "scan")
-
- # Workaround for extracting metadata with ingress
- if rpool.check_rpool("derivatives-dir"):
- selectrest_json = pe.Node(
- function.Function(
- input_names=["scan", "rest_dict", "resource"],
- output_names=["file_path"],
- function=get_rest,
- as_module=True,
- ),
- name="selectrest_json",
- )
- selectrest_json.inputs.rest_dict = sub_dict
- selectrest_json.inputs.resource = "scan_parameters"
- wf.connect(node, out, selectrest_json, "scan")
- wf.connect(selectrest_json, "file_path", scan_params, "data_config_scan_params")
-
- else:
- # wire in the scan parameter workflow
- node, out = rpool.get("scan-params")["['scan-params:scan_params_ingress']"][
- "data"
- ]
- wf.connect(node, out, scan_params, "data_config_scan_params")
-
- rpool.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress")
- rpool.set_data("tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress")
- rpool.set_data("template", scan_params, "template", {}, "", "func_metadata_ingress")
- rpool.set_data(
- "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress"
- )
- rpool.set_data("stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress")
- rpool.set_data(
- "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress"
- )
-
- if diff:
- # Connect EffectiveEchoSpacing from functional metadata
- rpool.set_data(
- "effectiveEchoSpacing",
- scan_params,
- "effective_echo_spacing",
- {},
- "",
- "func_metadata_ingress",
- )
- node, out_file = rpool.get("effectiveEchoSpacing")[
- "['effectiveEchoSpacing:func_metadata_ingress']"
- ]["data"]
- wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing")
- rpool.set_data(
- "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress"
- )
- rpool.set_data(
- "ees-asym-ratio",
- calc_delta_ratio,
- "ees_asym_ratio",
- {},
- "",
- "ees_asym_ratio_ingress",
- )
-
- return wf, rpool, diff, blip, fmap_rp_list
-
-
def create_general_datasource(wf_name):
"""Create a general-purpose datasource node."""
import nipype.interfaces.utility as util
@@ -880,9 +476,16 @@ def create_check_for_s3_node(
return check_s3_node
+@function.Function.sig_imports(
+ ["from pathlib import Path", "from typing import Optional"]
+)
def check_for_s3(
- file_path, creds_path=None, dl_dir=None, img_type="other", verbose=False
-):
+ file_path: Path | str,
+ creds_path: Optional[Path | str] = None,
+ dl_dir: Optional[Path | str] = None,
+ img_type: str = "other",
+ verbose: bool = False,
+) -> Path | str:
"""Check if passed-in file is on S3."""
# Import packages
import os
diff --git a/CPAC/utils/interfaces/function/function.py b/CPAC/utils/interfaces/function/function.py
index 34d01373d5..2df6741717 100644
--- a/CPAC/utils/interfaces/function/function.py
+++ b/CPAC/utils/interfaces/function/function.py
@@ -156,28 +156,28 @@ class Function(NipypeFunction):
def __init__(
self,
- input_names=None,
- output_names="out",
- function=None,
- imports=None,
- as_module=False,
+ input_names: Optional[str | list[str]] = None,
+ output_names: Optional[str | list[str]] = "out",
+ function: Optional[Callable] = None,
+ imports: Optional[list[str]] = None,
+ as_module: bool = False,
**inputs,
):
- """Initialize a :py:func`~CPAC.utils.interfaces.function.Function` interface.
+ """Initialize a :py:func:`~CPAC.utils.interfaces.function.Function` interface.
Parameters
----------
- input_names : single str or list or None
+ input_names
names corresponding to function inputs
if ``None``, derive input names from function argument names
- output_names : single str or list
+ output_names
names corresponding to function outputs (default: 'out').
if list of length > 1, has to match the number of outputs
- function : callable
+ function
callable python object. must be able to execute in an
- isolated namespace (possibly in concert with the ``imports``
+ isolated namespace (possibly in concert with the `imports`
parameter)
- imports : list of strings
+ imports
list of import statements that allow the function to execute
in an otherwise empty namespace. If these collide with
imports defined via the :py:meth:`Function.sig_imports`
@@ -244,12 +244,11 @@ def sig_imports(imports: list[str]) -> Callable:
Parameters
----------
- imports : list of str
+ imports
import statements to import the function in an otherwise empty
namespace. If these collide with imports defined via the
- :py:meth:`Function.__init__` initialization method, the
- imports given as a parameter here will be overridden by
- those from the initializer.
+ :py:meth:`Function.__init__` method, the imports given as a parameter here
+ will be overridden by those from the initializer.
Returns
-------
diff --git a/CPAC/utils/strategy.py b/CPAC/utils/strategy.py
index 67f4de5770..42d6848e9c 100644
--- a/CPAC/utils/strategy.py
+++ b/CPAC/utils/strategy.py
@@ -21,7 +21,7 @@
class Strategy:
def __init__(self):
- self._resource_pool = ResourcePool({})
+ self._resource_pool = ResourcePool()
self.leaf_node = None
self.leaf_out_file = None
self.name = []
@@ -29,9 +29,6 @@ def __init__(self):
def append_name(self, name):
self.name.append(name)
- def get_name(self):
- return self.name
-
def set_leaf_properties(self, node, out_file):
self.leaf_node = node
self.leaf_out_file = out_file
@@ -57,7 +54,7 @@ def get_node_from_resource_pool(self, resource_key):
@property
def resource_pool(self):
"""Strategy's ResourcePool dict."""
- return self._resource_pool.get_entire_rpool()
+ return self._resource_pool.rpool
@property
def rpool(self):
diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py
index ab896c6029..4d8f18dabe 100644
--- a/CPAC/utils/tests/test_utils.py
+++ b/CPAC/utils/tests/test_utils.py
@@ -7,7 +7,7 @@
import pytest
from CPAC.func_preproc import get_motion_ref
-from CPAC.pipeline.nodeblock import NodeBlockFunction
+from CPAC.pipeline.engine.nodeblock import NodeBlockFunction
from CPAC.utils.configuration import Configuration
from CPAC.utils.monitoring.custom_logging import log_subprocess
from CPAC.utils.tests import old_functions
diff --git a/CPAC/utils/typing.py b/CPAC/utils/typing.py
new file mode 100644
index 0000000000..79197dd314
--- /dev/null
+++ b/CPAC/utils/typing.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""Type aliases for C-PAC."""
+
+from typing import ForwardRef
+
+LIST_OF_LIST_OF_STR = str | list[ForwardRef("LIST_OF_LIST_OF_STR")]
+# _PIPE_IDX = list[ForwardRef("PIPE_IDX")] | str | tuple[ForwardRef("PIPE_IDX"), ...]
+# PIPE_IDX = TypeVar("PIPE_IDX", bound=_PIPE_IDX)
+PIPE_IDX = list[str | tuple] | str | tuple
diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py
index b459262993..8e179411ae 100644
--- a/CPAC/utils/utils.py
+++ b/CPAC/utils/utils.py
@@ -138,7 +138,7 @@ def get_flag_wf(wf_name="get_flag"):
wf.connect(input_node, "in_flag", get_flag, "in_flag")
-def read_json(json_file):
+def read_json(json_file: str) -> dict:
"""Read a JSON file and return the contents as a dictionary."""
try:
with open(json_file, "r") as f:
@@ -224,6 +224,7 @@ def create_id_string(
return combine_multiple_entity_instances(res_in_filename(cfg, out_filename))
+@Function.sig_imports(["import os", "import json"])
def write_output_json(json_data, filename, indent=3, basedir=None):
"""Write a dictionary to a JSON file."""
if not basedir:
diff --git a/CPAC/vmhc/vmhc.py b/CPAC/vmhc/vmhc.py
index 3c547a8e2f..e09f156dfb 100644
--- a/CPAC/vmhc/vmhc.py
+++ b/CPAC/vmhc/vmhc.py
@@ -3,7 +3,7 @@
from CPAC.image_utils import spatial_smoothing
from CPAC.pipeline import nipype_pipeline_engine as pe
-from CPAC.pipeline.nodeblock import nodeblock
+from CPAC.pipeline.engine.nodeblock import nodeblock
from CPAC.registration.registration import apply_transform
from CPAC.utils.utils import check_prov_for_regtool
from CPAC.vmhc import *
diff --git a/dev/circleci_data/conftest.py b/dev/circleci_data/conftest.py
new file mode 100644
index 0000000000..4d67fdac05
--- /dev/null
+++ b/dev/circleci_data/conftest.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2024 C-PAC Developers
+
+# This file is part of C-PAC.
+
+# C-PAC is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+
+# C-PAC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with C-PAC. If not, see .
+"""Pytest configuration for CircleCI-specific tests."""
+
+from CPAC.conftest import bids_examples
diff --git a/dev/circleci_data/test_external_utils.py b/dev/circleci_data/test_external_utils.py
index f516b0c903..d4892fee3b 100644
--- a/dev/circleci_data/test_external_utils.py
+++ b/dev/circleci_data/test_external_utils.py
@@ -25,9 +25,9 @@
import pytest
import semver
-CPAC_DIR = str(Path(__file__).parent.parent.parent)
-sys.path.append(CPAC_DIR)
-DATA_DIR = os.path.join(CPAC_DIR, "dev", "circleci_data")
+CPAC_DIR = Path(__file__).parent.parent.parent
+sys.path.append(str(CPAC_DIR))
+DATA_DIR = CPAC_DIR / "dev/circleci_data"
from CPAC.__main__ import utils as CPAC_main_utils # noqa: E402
@@ -70,9 +70,8 @@ def test_build_data_config(caplog, cli_runner, multiword_connector):
caplog.set_level(INFO)
if multiword_connector == "-" and _BACKPORT_CLICK:
return
- os.chdir(DATA_DIR)
- test_yaml = os.path.join(DATA_DIR, "data_settings.yml")
- _delete_test_yaml(test_yaml)
+ os.chdir(str(DATA_DIR))
+ test_yaml = DATA_DIR / "data_settings.yml"
if multiword_connector == "_":
data_config = CPAC_main_utils.commands[
_click_backport(CPAC_main_utils, "data-config")
@@ -89,49 +88,50 @@ def test_build_data_config(caplog, cli_runner, multiword_connector):
assert "\n".join(caplog.messages).startswith(
"\nGenerated a default data_settings YAML file for editing"
)
- assert os.path.exists(test_yaml)
+ assert test_yaml.exists()
_delete_test_yaml(test_yaml)
-def test_new_settings_template(caplog, cli_runner):
+def test_new_settings_template(bids_examples, caplog, cli_runner):
"""Test CLI ``utils new-settings-template``."""
caplog.set_level(INFO)
- os.chdir(CPAC_DIR)
-
- example_dir = os.path.join(CPAC_DIR, "bids-examples")
- if not os.path.exists(example_dir):
- from git import Repo
-
- Repo.clone_from(
- "https://github.com/bids-standard/bids-examples.git", example_dir
- )
+ example_dir = Path(CPAC_DIR / "bids-examples")
+ if not example_dir.exists():
+ example_dir.symlink_to(bids_examples)
+ os.chdir(str(CPAC_DIR))
result = cli_runner.invoke(
CPAC_main_utils.commands[
_click_backport(CPAC_main_utils, "data-config")
].commands["build"],
- [os.path.join(DATA_DIR, "data_settings_bids_examples_ds051_default_BIDS.yml")],
+ [str(DATA_DIR / "data_settings_bids_examples_ds051_default_BIDS.yml")],
)
- participant_yaml = os.path.join(DATA_DIR, "data_config_ds051.yml")
- group_yaml = os.path.join(DATA_DIR, "group_analysis_participants_ds051.txt")
+ participant_yaml = DATA_DIR / "data_config_ds051.yml"
+ group_yaml = DATA_DIR / "group_analysis_participants_ds051.txt"
+
+ if example_dir.is_symlink() or example_dir.is_file():
+ example_dir.unlink()
+ else:
+ from shutil import rmtree
+ rmtree(example_dir)
assert result.exit_code == 0
assert "\n".join(caplog.messages).startswith(
"\nGenerating data configuration file.."
)
- assert os.path.exists(participant_yaml)
- assert os.path.exists(group_yaml)
+ assert participant_yaml.exists()
+ assert group_yaml.exists()
_delete_test_yaml(participant_yaml)
_delete_test_yaml(group_yaml)
def test_repickle(cli_runner): # noqa
fn = "python_2_pickle.pkl"
- pickle_path = os.path.join(DATA_DIR, fn)
+ pickle_path = str(DATA_DIR / fn)
backups = [_Backup(pickle_path), _Backup(f"{pickle_path}z")]
- result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR])
+ result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)])
assert result.exit_code == 0
assert (
@@ -139,7 +139,7 @@ def test_repickle(cli_runner): # noqa
"pickle." in result.output
)
- result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR])
+ result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)])
assert result.exit_code == 0
assert f"Pickle {fn} is a Python 3 pickle." in result.output
@@ -157,9 +157,10 @@ def restore(self):
w.write(self.data)
-def _delete_test_yaml(test_yaml):
- if os.path.exists(test_yaml):
- os.remove(test_yaml)
+def _delete_test_yaml(test_yaml: Path) -> None:
+ """Delete test YAML file."""
+ if test_yaml.exists():
+ os.remove(str(test_yaml))
def _test_repickle(pickle_path, gzipped=False):
diff --git a/dev/circleci_data/test_in_image.sh b/dev/circleci_data/test_in_image.sh
index b62de84994..9420d7c1ab 100755
--- a/dev/circleci_data/test_in_image.sh
+++ b/dev/circleci_data/test_in_image.sh
@@ -4,7 +4,7 @@ export PATH=$PATH:/home/$(whoami)/.local/bin
pip install -r /code/dev/circleci_data/requirements.txt
# run test with coverage as module
-python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC
+python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --capture=no --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC
echo "$?" > test-results/exitcode